AmosLewis · May 1, 2024 00:38
diff --git a/resnet50_vaiq_int8.default.onnx.linalg.mlir b/resnet50_vaiq_int8.default.onnx.linalg.mlir
 #map = affine_map<(d0, d1, d2, d3) -> (0, d1, d2, d3)>
 #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 #map2 = affine_map<(d0) -> (d0)>
 #map3 = affine_map<(d0, d1, d2, d3) -> (d1)>
 #map4 = affine_map<(d0, d1, d2, d3) -> (d0, d1, 0, 0)>
 #map5 = affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>
 #map6 = affine_map<(d0, d1, d2, d3) -> ()>
 #map7 = affine_map<(d0, d1) -> (d0, d1)>
 #map8 = affine_map<(d0, d1) -> (d1, d0)>
 #map9 = affine_map<(d0, d1) -> (0, d1)>
 #map10 = affine_map<(d0, d1) -> (d1)>
 module {
  func.func @torch_jit(%arg0: !torch.vtensor<[1,3,224,224],f32>) -> !torch.vtensor<[1,1000],f32> attributes {torch.onnx_meta.ir_version = 8 : si64, torch.onnx_meta.opset_version = 17 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "1.13.1"} {
    %0 = torch_c.to_builtin_tensor %arg0 : !torch.vtensor<[1,3,224,224],f32> -> tensor<1x3x224x224xf32>
    %1 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x3x7x7xf32>) : !torch.vtensor<[64,3,7,7],f32>
    %2 = torch_c.to_builtin_tensor %1 : !torch.vtensor<[64,3,7,7],f32> -> tensor<64x3x7x7xf32>
    %3 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
    %4 = torch_c.to_builtin_tensor %3 : !torch.vtensor<[64],f32> -> tensor<64xf32>
    %5 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x64x1x1xf32>) : !torch.vtensor<[64,64,1,1],f32>
    %6 = torch_c.to_builtin_tensor %5 : !torch.vtensor<[64,64,1,1],f32> -> tensor<64x64x1x1xf32>
    %7 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
    %8 = torch_c.to_builtin_tensor %7 : !torch.vtensor<[64],f32> -> tensor<64xf32>
    %9 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x64x3x3xf32>) : !torch.vtensor<[64,64,3,3],f32>
    %10 = torch_c.to_builtin_tensor %9 : !torch.vtensor<[64,64,3,3],f32> -> tensor<64x64x3x3xf32>
    %11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
    %12 = torch_c.to_builtin_tensor %11 : !torch.vtensor<[64],f32> -> tensor<64xf32>
    %13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x64x1x1xf32>) : !torch.vtensor<[256,64,1,1],f32>
    %14 = torch_c.to_builtin_tensor %13 : !torch.vtensor<[256,64,1,1],f32> -> tensor<256x64x1x1xf32>
    %15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %16 = torch_c.to_builtin_tensor %15 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x64x1x1xf32>) : !torch.vtensor<[256,64,1,1],f32>
    %18 = torch_c.to_builtin_tensor %17 : !torch.vtensor<[256,64,1,1],f32> -> tensor<256x64x1x1xf32>
    %19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %20 = torch_c.to_builtin_tensor %19 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x256x1x1xf32>) : !torch.vtensor<[64,256,1,1],f32>
    %22 = torch_c.to_builtin_tensor %21 : !torch.vtensor<[64,256,1,1],f32> -> tensor<64x256x1x1xf32>
    %23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
    %24 = torch_c.to_builtin_tensor %23 : !torch.vtensor<[64],f32> -> tensor<64xf32>
    %25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x64x3x3xf32>) : !torch.vtensor<[64,64,3,3],f32>
    %26 = torch_c.to_builtin_tensor %25 : !torch.vtensor<[64,64,3,3],f32> -> tensor<64x64x3x3xf32>
    %27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
    %28 = torch_c.to_builtin_tensor %27 : !torch.vtensor<[64],f32> -> tensor<64xf32>
    %29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x64x1x1xf32>) : !torch.vtensor<[256,64,1,1],f32>
    %30 = torch_c.to_builtin_tensor %29 : !torch.vtensor<[256,64,1,1],f32> -> tensor<256x64x1x1xf32>
    %31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %32 = torch_c.to_builtin_tensor %31 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x256x1x1xf32>) : !torch.vtensor<[64,256,1,1],f32>
    %34 = torch_c.to_builtin_tensor %33 : !torch.vtensor<[64,256,1,1],f32> -> tensor<64x256x1x1xf32>
    %35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
    %36 = torch_c.to_builtin_tensor %35 : !torch.vtensor<[64],f32> -> tensor<64xf32>
    %37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64x64x3x3xf32>) : !torch.vtensor<[64,64,3,3],f32>
    %38 = torch_c.to_builtin_tensor %37 : !torch.vtensor<[64,64,3,3],f32> -> tensor<64x64x3x3xf32>
    %39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<64xf32>) : !torch.vtensor<[64],f32>
    %40 = torch_c.to_builtin_tensor %39 : !torch.vtensor<[64],f32> -> tensor<64xf32>
    %41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x64x1x1xf32>) : !torch.vtensor<[256,64,1,1],f32>
    %42 = torch_c.to_builtin_tensor %41 : !torch.vtensor<[256,64,1,1],f32> -> tensor<256x64x1x1xf32>
    %43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %44 = torch_c.to_builtin_tensor %43 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x256x1x1xf32>) : !torch.vtensor<[128,256,1,1],f32>
    %46 = torch_c.to_builtin_tensor %45 : !torch.vtensor<[128,256,1,1],f32> -> tensor<128x256x1x1xf32>
    %47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
    %48 = torch_c.to_builtin_tensor %47 : !torch.vtensor<[128],f32> -> tensor<128xf32>
    %49 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x128x3x3xf32>) : !torch.vtensor<[128,128,3,3],f32>
    %50 = torch_c.to_builtin_tensor %49 : !torch.vtensor<[128,128,3,3],f32> -> tensor<128x128x3x3xf32>
    %51 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
    %52 = torch_c.to_builtin_tensor %51 : !torch.vtensor<[128],f32> -> tensor<128xf32>
    %53 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x128x1x1xf32>) : !torch.vtensor<[512,128,1,1],f32>
    %54 = torch_c.to_builtin_tensor %53 : !torch.vtensor<[512,128,1,1],f32> -> tensor<512x128x1x1xf32>
    %55 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
    %56 = torch_c.to_builtin_tensor %55 : !torch.vtensor<[512],f32> -> tensor<512xf32>
    %57 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x256x1x1xf32>) : !torch.vtensor<[512,256,1,1],f32>
    %58 = torch_c.to_builtin_tensor %57 : !torch.vtensor<[512,256,1,1],f32> -> tensor<512x256x1x1xf32>
    %59 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
    %60 = torch_c.to_builtin_tensor %59 : !torch.vtensor<[512],f32> -> tensor<512xf32>
    %61 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x512x1x1xf32>) : !torch.vtensor<[128,512,1,1],f32>
    %62 = torch_c.to_builtin_tensor %61 : !torch.vtensor<[128,512,1,1],f32> -> tensor<128x512x1x1xf32>
    %63 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
    %64 = torch_c.to_builtin_tensor %63 : !torch.vtensor<[128],f32> -> tensor<128xf32>
    %65 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x128x3x3xf32>) : !torch.vtensor<[128,128,3,3],f32>
    %66 = torch_c.to_builtin_tensor %65 : !torch.vtensor<[128,128,3,3],f32> -> tensor<128x128x3x3xf32>
    %67 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
    %68 = torch_c.to_builtin_tensor %67 : !torch.vtensor<[128],f32> -> tensor<128xf32>
    %69 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x128x1x1xf32>) : !torch.vtensor<[512,128,1,1],f32>
    %70 = torch_c.to_builtin_tensor %69 : !torch.vtensor<[512,128,1,1],f32> -> tensor<512x128x1x1xf32>
    %71 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
    %72 = torch_c.to_builtin_tensor %71 : !torch.vtensor<[512],f32> -> tensor<512xf32>
    %73 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x512x1x1xf32>) : !torch.vtensor<[128,512,1,1],f32>
    %74 = torch_c.to_builtin_tensor %73 : !torch.vtensor<[128,512,1,1],f32> -> tensor<128x512x1x1xf32>
    %75 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
    %76 = torch_c.to_builtin_tensor %75 : !torch.vtensor<[128],f32> -> tensor<128xf32>
    %77 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x128x3x3xf32>) : !torch.vtensor<[128,128,3,3],f32>
    %78 = torch_c.to_builtin_tensor %77 : !torch.vtensor<[128,128,3,3],f32> -> tensor<128x128x3x3xf32>
    %79 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
    %80 = torch_c.to_builtin_tensor %79 : !torch.vtensor<[128],f32> -> tensor<128xf32>
    %81 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x128x1x1xf32>) : !torch.vtensor<[512,128,1,1],f32>
    %82 = torch_c.to_builtin_tensor %81 : !torch.vtensor<[512,128,1,1],f32> -> tensor<512x128x1x1xf32>
    %83 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
    %84 = torch_c.to_builtin_tensor %83 : !torch.vtensor<[512],f32> -> tensor<512xf32>
    %85 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x512x1x1xf32>) : !torch.vtensor<[128,512,1,1],f32>
    %86 = torch_c.to_builtin_tensor %85 : !torch.vtensor<[128,512,1,1],f32> -> tensor<128x512x1x1xf32>
    %87 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
    %88 = torch_c.to_builtin_tensor %87 : !torch.vtensor<[128],f32> -> tensor<128xf32>
    %89 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128x128x3x3xf32>) : !torch.vtensor<[128,128,3,3],f32>
    %90 = torch_c.to_builtin_tensor %89 : !torch.vtensor<[128,128,3,3],f32> -> tensor<128x128x3x3xf32>
    %91 = torch.vtensor.literal(dense_resource<__elided__> : tensor<128xf32>) : !torch.vtensor<[128],f32>
    %92 = torch_c.to_builtin_tensor %91 : !torch.vtensor<[128],f32> -> tensor<128xf32>
    %93 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x128x1x1xf32>) : !torch.vtensor<[512,128,1,1],f32>
    %94 = torch_c.to_builtin_tensor %93 : !torch.vtensor<[512,128,1,1],f32> -> tensor<512x128x1x1xf32>
    %95 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
    %96 = torch_c.to_builtin_tensor %95 : !torch.vtensor<[512],f32> -> tensor<512xf32>
    %97 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x512x1x1xf32>) : !torch.vtensor<[256,512,1,1],f32>
    %98 = torch_c.to_builtin_tensor %97 : !torch.vtensor<[256,512,1,1],f32> -> tensor<256x512x1x1xf32>
    %99 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %100 = torch_c.to_builtin_tensor %99 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %101 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x256x3x3xf32>) : !torch.vtensor<[256,256,3,3],f32>
    %102 = torch_c.to_builtin_tensor %101 : !torch.vtensor<[256,256,3,3],f32> -> tensor<256x256x3x3xf32>
    %103 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %104 = torch_c.to_builtin_tensor %103 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %105 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x256x1x1xf32>) : !torch.vtensor<[1024,256,1,1],f32>
    %106 = torch_c.to_builtin_tensor %105 : !torch.vtensor<[1024,256,1,1],f32> -> tensor<1024x256x1x1xf32>
    %107 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
    %108 = torch_c.to_builtin_tensor %107 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
    %109 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x512x1x1xf32>) : !torch.vtensor<[1024,512,1,1],f32>
    %110 = torch_c.to_builtin_tensor %109 : !torch.vtensor<[1024,512,1,1],f32> -> tensor<1024x512x1x1xf32>
    %111 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
    %112 = torch_c.to_builtin_tensor %111 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
    %113 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x1024x1x1xf32>) : !torch.vtensor<[256,1024,1,1],f32>
    %114 = torch_c.to_builtin_tensor %113 : !torch.vtensor<[256,1024,1,1],f32> -> tensor<256x1024x1x1xf32>
    %115 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %116 = torch_c.to_builtin_tensor %115 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %117 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x256x3x3xf32>) : !torch.vtensor<[256,256,3,3],f32>
    %118 = torch_c.to_builtin_tensor %117 : !torch.vtensor<[256,256,3,3],f32> -> tensor<256x256x3x3xf32>
    %119 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %120 = torch_c.to_builtin_tensor %119 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %121 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x256x1x1xf32>) : !torch.vtensor<[1024,256,1,1],f32>
    %122 = torch_c.to_builtin_tensor %121 : !torch.vtensor<[1024,256,1,1],f32> -> tensor<1024x256x1x1xf32>
    %123 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
    %124 = torch_c.to_builtin_tensor %123 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
    %125 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x1024x1x1xf32>) : !torch.vtensor<[256,1024,1,1],f32>
    %126 = torch_c.to_builtin_tensor %125 : !torch.vtensor<[256,1024,1,1],f32> -> tensor<256x1024x1x1xf32>
    %127 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %128 = torch_c.to_builtin_tensor %127 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %129 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x256x3x3xf32>) : !torch.vtensor<[256,256,3,3],f32>
    %130 = torch_c.to_builtin_tensor %129 : !torch.vtensor<[256,256,3,3],f32> -> tensor<256x256x3x3xf32>
    %131 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %132 = torch_c.to_builtin_tensor %131 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %133 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x256x1x1xf32>) : !torch.vtensor<[1024,256,1,1],f32>
    %134 = torch_c.to_builtin_tensor %133 : !torch.vtensor<[1024,256,1,1],f32> -> tensor<1024x256x1x1xf32>
    %135 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
    %136 = torch_c.to_builtin_tensor %135 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
    %137 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x1024x1x1xf32>) : !torch.vtensor<[256,1024,1,1],f32>
    %138 = torch_c.to_builtin_tensor %137 : !torch.vtensor<[256,1024,1,1],f32> -> tensor<256x1024x1x1xf32>
    %139 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %140 = torch_c.to_builtin_tensor %139 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %141 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x256x3x3xf32>) : !torch.vtensor<[256,256,3,3],f32>
    %142 = torch_c.to_builtin_tensor %141 : !torch.vtensor<[256,256,3,3],f32> -> tensor<256x256x3x3xf32>
    %143 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %144 = torch_c.to_builtin_tensor %143 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %145 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x256x1x1xf32>) : !torch.vtensor<[1024,256,1,1],f32>
    %146 = torch_c.to_builtin_tensor %145 : !torch.vtensor<[1024,256,1,1],f32> -> tensor<1024x256x1x1xf32>
    %147 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
    %148 = torch_c.to_builtin_tensor %147 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
    %149 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x1024x1x1xf32>) : !torch.vtensor<[256,1024,1,1],f32>
    %150 = torch_c.to_builtin_tensor %149 : !torch.vtensor<[256,1024,1,1],f32> -> tensor<256x1024x1x1xf32>
    %151 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %152 = torch_c.to_builtin_tensor %151 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %153 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x256x3x3xf32>) : !torch.vtensor<[256,256,3,3],f32>
    %154 = torch_c.to_builtin_tensor %153 : !torch.vtensor<[256,256,3,3],f32> -> tensor<256x256x3x3xf32>
    %155 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %156 = torch_c.to_builtin_tensor %155 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %157 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x256x1x1xf32>) : !torch.vtensor<[1024,256,1,1],f32>
    %158 = torch_c.to_builtin_tensor %157 : !torch.vtensor<[1024,256,1,1],f32> -> tensor<1024x256x1x1xf32>
    %159 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
    %160 = torch_c.to_builtin_tensor %159 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
    %161 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x1024x1x1xf32>) : !torch.vtensor<[256,1024,1,1],f32>
    %162 = torch_c.to_builtin_tensor %161 : !torch.vtensor<[256,1024,1,1],f32> -> tensor<256x1024x1x1xf32>
    %163 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %164 = torch_c.to_builtin_tensor %163 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %165 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256x256x3x3xf32>) : !torch.vtensor<[256,256,3,3],f32>
    %166 = torch_c.to_builtin_tensor %165 : !torch.vtensor<[256,256,3,3],f32> -> tensor<256x256x3x3xf32>
    %167 = torch.vtensor.literal(dense_resource<__elided__> : tensor<256xf32>) : !torch.vtensor<[256],f32>
    %168 = torch_c.to_builtin_tensor %167 : !torch.vtensor<[256],f32> -> tensor<256xf32>
    %169 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x256x1x1xf32>) : !torch.vtensor<[1024,256,1,1],f32>
    %170 = torch_c.to_builtin_tensor %169 : !torch.vtensor<[1024,256,1,1],f32> -> tensor<1024x256x1x1xf32>
    %171 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf32>) : !torch.vtensor<[1024],f32>
    %172 = torch_c.to_builtin_tensor %171 : !torch.vtensor<[1024],f32> -> tensor<1024xf32>
    %173 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x1024x1x1xf32>) : !torch.vtensor<[512,1024,1,1],f32>
    %174 = torch_c.to_builtin_tensor %173 : !torch.vtensor<[512,1024,1,1],f32> -> tensor<512x1024x1x1xf32>
    %175 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
    %176 = torch_c.to_builtin_tensor %175 : !torch.vtensor<[512],f32> -> tensor<512xf32>
    %177 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x512x3x3xf32>) : !torch.vtensor<[512,512,3,3],f32>
    %178 = torch_c.to_builtin_tensor %177 : !torch.vtensor<[512,512,3,3],f32> -> tensor<512x512x3x3xf32>
    %179 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
    %180 = torch_c.to_builtin_tensor %179 : !torch.vtensor<[512],f32> -> tensor<512xf32>
    %181 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048x512x1x1xf32>) : !torch.vtensor<[2048,512,1,1],f32>
    %182 = torch_c.to_builtin_tensor %181 : !torch.vtensor<[2048,512,1,1],f32> -> tensor<2048x512x1x1xf32>
    %183 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048xf32>) : !torch.vtensor<[2048],f32>
    %184 = torch_c.to_builtin_tensor %183 : !torch.vtensor<[2048],f32> -> tensor<2048xf32>
    %185 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048x1024x1x1xf32>) : !torch.vtensor<[2048,1024,1,1],f32>
    %186 = torch_c.to_builtin_tensor %185 : !torch.vtensor<[2048,1024,1,1],f32> -> tensor<2048x1024x1x1xf32>
    %187 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048xf32>) : !torch.vtensor<[2048],f32>
    %188 = torch_c.to_builtin_tensor %187 : !torch.vtensor<[2048],f32> -> tensor<2048xf32>
    %189 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x2048x1x1xf32>) : !torch.vtensor<[512,2048,1,1],f32>
    %190 = torch_c.to_builtin_tensor %189 : !torch.vtensor<[512,2048,1,1],f32> -> tensor<512x2048x1x1xf32>
    %191 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
    %192 = torch_c.to_builtin_tensor %191 : !torch.vtensor<[512],f32> -> tensor<512xf32>
    %193 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x512x3x3xf32>) : !torch.vtensor<[512,512,3,3],f32>
    %194 = torch_c.to_builtin_tensor %193 : !torch.vtensor<[512,512,3,3],f32> -> tensor<512x512x3x3xf32>
    %195 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
    %196 = torch_c.to_builtin_tensor %195 : !torch.vtensor<[512],f32> -> tensor<512xf32>
    %197 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048x512x1x1xf32>) : !torch.vtensor<[2048,512,1,1],f32>
    %198 = torch_c.to_builtin_tensor %197 : !torch.vtensor<[2048,512,1,1],f32> -> tensor<2048x512x1x1xf32>
    %199 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048xf32>) : !torch.vtensor<[2048],f32>
    %200 = torch_c.to_builtin_tensor %199 : !torch.vtensor<[2048],f32> -> tensor<2048xf32>
    %201 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x2048x1x1xf32>) : !torch.vtensor<[512,2048,1,1],f32>
    %202 = torch_c.to_builtin_tensor %201 : !torch.vtensor<[512,2048,1,1],f32> -> tensor<512x2048x1x1xf32>
    %203 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
    %204 = torch_c.to_builtin_tensor %203 : !torch.vtensor<[512],f32> -> tensor<512xf32>
    %205 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x512x3x3xf32>) : !torch.vtensor<[512,512,3,3],f32>
    %206 = torch_c.to_builtin_tensor %205 : !torch.vtensor<[512,512,3,3],f32> -> tensor<512x512x3x3xf32>
    %207 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
    %208 = torch_c.to_builtin_tensor %207 : !torch.vtensor<[512],f32> -> tensor<512xf32>
    %209 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048x512x1x1xf32>) : !torch.vtensor<[2048,512,1,1],f32>
    %210 = torch_c.to_builtin_tensor %209 : !torch.vtensor<[2048,512,1,1],f32> -> tensor<2048x512x1x1xf32>
    %211 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048xf32>) : !torch.vtensor<[2048],f32>
    %212 = torch_c.to_builtin_tensor %211 : !torch.vtensor<[2048],f32> -> tensor<2048xf32>
    %213 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1000x2048xf32>) : !torch.vtensor<[1000,2048],f32>
    %214 = torch_c.to_builtin_tensor %213 : !torch.vtensor<[1000,2048],f32> -> tensor<1000x2048xf32>
    %215 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1000xf32>) : !torch.vtensor<[1000],f32>
    %216 = torch_c.to_builtin_tensor %215 : !torch.vtensor<[1000],f32> -> tensor<1000xf32>
    %none = torch.constant.none
    %217 = torch.vtensor.literal(dense<6.250000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %218 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12 = torch.constant.int 12
    %219 = torch.aten.item %217 : !torch.vtensor<[],f32> -> !torch.float
    %220 = torch_c.to_f64 %219
    %221 = torch.aten.item %218 : !torch.vtensor<[],si8> -> !torch.int
    %222 = torch_c.to_i64 %221
    %c1 = arith.constant 1 : index
    %c1_0 = arith.constant 1 : index
    %c3 = arith.constant 3 : index
    %c2 = arith.constant 2 : index
    %c224 = arith.constant 224 : index
    %c3_1 = arith.constant 3 : index
    %c224_2 = arith.constant 224 : index
    %223 = tensor.empty() : tensor<1x3x224x224xi8>
    %224 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<1x3x224x224xf32>) outs(%223 : tensor<1x3x224x224xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %221
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %219
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x3x224x224xi8>
    %cast = tensor.cast %224 : tensor<1x3x224x224xi8> to tensor<1x3x224x224xi8>
    %cast_3 = tensor.cast %cast : tensor<1x3x224x224xi8> to tensor<1x3x224x224xi8>
    %225 = torch.vtensor.literal(dense<6.250000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %226 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %227 = torch.aten.item %225 : !torch.vtensor<[],f32> -> !torch.float
    %228 = torch_c.to_f64 %227
    %229 = torch.aten.item %226 : !torch.vtensor<[],si8> -> !torch.int
    %230 = torch_c.to_i64 %229
    %cast_4 = tensor.cast %cast_3 : tensor<1x3x224x224xi8> to tensor<1x3x224x224xi8>
    %c1_5 = arith.constant 1 : index
    %c1_6 = arith.constant 1 : index
    %c3_7 = arith.constant 3 : index
    %c2_8 = arith.constant 2 : index
    %c224_9 = arith.constant 224 : index
    %c3_10 = arith.constant 3 : index
    %c224_11 = arith.constant 224 : index
    %231 = tensor.empty() : tensor<1x3x224x224xf32>
    %232 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4 : tensor<1x3x224x224xi8>) outs(%231 : tensor<1x3x224x224xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %229
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %227
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x3x224x224xf32>
    %cast_12 = tensor.cast %232 : tensor<1x3x224x224xf32> to tensor<1x3x224x224xf32>
    %233 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %234 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_13 = torch.constant.int 12
    %235 = torch.aten.item %233 : !torch.vtensor<[],f32> -> !torch.float
    %236 = torch_c.to_f64 %235
    %237 = torch.aten.item %234 : !torch.vtensor<[],si8> -> !torch.int
    %238 = torch_c.to_i64 %237
    %c1_14 = arith.constant 1 : index
    %c0 = arith.constant 0 : index
    %c64 = arith.constant 64 : index
    %c1_15 = arith.constant 1 : index
    %c3_16 = arith.constant 3 : index
    %c2_17 = arith.constant 2 : index
    %c7 = arith.constant 7 : index
    %c3_18 = arith.constant 3 : index
    %c7_19 = arith.constant 7 : index
    %239 = tensor.empty() : tensor<64x3x7x7xi8>
    %240 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<64x3x7x7xf32>) outs(%239 : tensor<64x3x7x7xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %237
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %235
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64x3x7x7xi8>
    %cast_20 = tensor.cast %240 : tensor<64x3x7x7xi8> to tensor<64x3x7x7xi8>
    %cast_21 = tensor.cast %cast_20 : tensor<64x3x7x7xi8> to tensor<64x3x7x7xi8>
    %241 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %242 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %243 = torch.aten.item %241 : !torch.vtensor<[],f32> -> !torch.float
    %244 = torch_c.to_f64 %243
    %245 = torch.aten.item %242 : !torch.vtensor<[],si8> -> !torch.int
    %246 = torch_c.to_i64 %245
    %cast_22 = tensor.cast %cast_21 : tensor<64x3x7x7xi8> to tensor<64x3x7x7xi8>
    %c1_23 = arith.constant 1 : index
    %c0_24 = arith.constant 0 : index
    %c64_25 = arith.constant 64 : index
    %c1_26 = arith.constant 1 : index
    %c3_27 = arith.constant 3 : index
    %c2_28 = arith.constant 2 : index
    %c7_29 = arith.constant 7 : index
    %c3_30 = arith.constant 3 : index
    %c7_31 = arith.constant 7 : index
    %247 = tensor.empty() : tensor<64x3x7x7xf32>
    %248 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_22 : tensor<64x3x7x7xi8>) outs(%247 : tensor<64x3x7x7xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %245
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %243
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64x3x7x7xf32>
    %cast_32 = tensor.cast %248 : tensor<64x3x7x7xf32> to tensor<64x3x7x7xf32>
    %249 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %250 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_33 = torch.constant.int 12
    %251 = torch.aten.item %249 : !torch.vtensor<[],f32> -> !torch.float
    %252 = torch_c.to_f64 %251
    %253 = torch.aten.item %250 : !torch.vtensor<[],si8> -> !torch.int
    %254 = torch_c.to_i64 %253
    %c1_34 = arith.constant 1 : index
    %c0_35 = arith.constant 0 : index
    %c64_36 = arith.constant 64 : index
    %255 = tensor.empty() : tensor<64xi8>
    %256 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%4 : tensor<64xf32>) outs(%255 : tensor<64xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %253
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %251
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64xi8>
    %cast_37 = tensor.cast %256 : tensor<64xi8> to tensor<64xi8>
    %cast_38 = tensor.cast %cast_37 : tensor<64xi8> to tensor<64xi8>
    %257 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %258 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %259 = torch.aten.item %257 : !torch.vtensor<[],f32> -> !torch.float
    %260 = torch_c.to_f64 %259
    %261 = torch.aten.item %258 : !torch.vtensor<[],si8> -> !torch.int
    %262 = torch_c.to_i64 %261
    %cast_39 = tensor.cast %cast_38 : tensor<64xi8> to tensor<64xi8>
    %c1_40 = arith.constant 1 : index
    %c0_41 = arith.constant 0 : index
    %c64_42 = arith.constant 64 : index
    %263 = tensor.empty() : tensor<64xf32>
    %264 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_39 : tensor<64xi8>) outs(%263 : tensor<64xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %261
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %259
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64xf32>
    %cast_43 = tensor.cast %264 : tensor<64xf32> to tensor<64xf32>
    %int3 = torch.constant.int 3
    %int3_44 = torch.constant.int 3
    %int1 = torch.constant.int 1
    %int1_45 = torch.constant.int 1
    %int2 = torch.constant.int 2
    %int2_46 = torch.constant.int 2
    %int0 = torch.constant.int 0
    %265 = torch.prim.ListConstruct %int3, %int3_44 : (!torch.int, !torch.int) -> !torch.list<int>
    %266 = torch.prim.ListConstruct %int1, %int1_45 : (!torch.int, !torch.int) -> !torch.list<int>
    %267 = torch.prim.ListConstruct %int2, %int2_46 : (!torch.int, !torch.int) -> !torch.list<int>
    %268 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
    %false = torch.constant.bool false
    %int1_47 = torch.constant.int 1
    %269 = torch_c.to_i64 %int1_47
    %270 = torch_c.to_i64 %int3
    %271 = torch_c.to_i64 %int3_44
    %272 = torch_c.to_i64 %int0
    %273 = torch_c.to_i64 %int0
    %c0_48 = arith.constant 0 : index
    %c1_49 = arith.constant 1 : index
    %c1_50 = arith.constant 1 : index
    %c3_51 = arith.constant 3 : index
    %c2_52 = arith.constant 2 : index
    %c224_53 = arith.constant 224 : index
    %c3_54 = arith.constant 3 : index
    %c224_55 = arith.constant 224 : index
    %c0_56 = arith.constant 0 : index
    %c64_57 = arith.constant 64 : index
    %c1_58 = arith.constant 1 : index
    %c3_59 = arith.constant 3 : index
    %c2_60 = arith.constant 2 : index
    %c7_61 = arith.constant 7 : index
    %c3_62 = arith.constant 3 : index
    %c7_63 = arith.constant 7 : index
    %274 = arith.index_cast %269 : i64 to index
    %c0_64 = arith.constant 0 : index
    %275 = arith.remsi %c3_51, %274 : index
    %276 = arith.cmpi eq, %c0_64, %275 : index
    cf.assert %276, "invalid: groups must divide input channel size evenly."
    %c0_65 = arith.constant 0 : index
    %277 = arith.remsi %c64_57, %274 : index
    %278 = arith.cmpi eq, %c0_65, %277 : index
    cf.assert %278, "invalid: groups must divide weight batch size evenly."
    %c1_i64 = arith.constant 1 : i64
    %c1_i64_66 = arith.constant 1 : i64
    %c2_i64 = arith.constant 2 : i64
    %c2_i64_67 = arith.constant 2 : i64
    %cst = arith.constant 0.000000e+00 : f32
    %c0_68 = arith.constant 0 : index
    %c1_69 = arith.constant 1 : index
    %c1_70 = arith.constant 1 : index
    %c3_71 = arith.constant 3 : index
    %c2_72 = arith.constant 2 : index
    %c224_73 = arith.constant 224 : index
    %c3_74 = arith.constant 3 : index
    %c224_75 = arith.constant 224 : index
    %c0_i64 = arith.constant 0 : i64
    %279 = arith.index_cast %c0_i64 : i64 to index
    %280 = arith.index_cast %c0_i64 : i64 to index
    %281 = arith.index_cast %270 : i64 to index
    %282 = arith.index_cast %271 : i64 to index
    %padded = tensor.pad %cast_12 low[%279, %280, %281, %282] high[%279, %280, %281, %282] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst : f32
    } : tensor<1x3x224x224xf32> to tensor<?x?x?x?xf32>
    %283 = arith.index_cast %c7_61 : index to i64
    %c1_i64_76 = arith.constant 1 : i64
    %c2_i64_77 = arith.constant 2 : i64
    %284 = arith.muli %270, %c2_i64_77 : i64
    %285 = arith.index_cast %c224_53 : index to i64
    %286 = arith.addi %285, %284 : i64
    %287 = arith.subi %283, %c1_i64_76 : i64
    %288 = arith.muli %c1_i64, %287 : i64
    %289 = arith.subi %286, %288 : i64
    %290 = arith.subi %289, %c1_i64_76 : i64
    %291 = arith.floordivsi %290, %c2_i64 : i64
    %292 = arith.addi %291, %c1_i64_76 : i64
    %293 = arith.index_cast %292 : i64 to index
    %294 = arith.index_cast %c7_63 : index to i64
    %c1_i64_78 = arith.constant 1 : i64
    %c2_i64_79 = arith.constant 2 : i64
    %295 = arith.muli %271, %c2_i64_79 : i64
    %296 = arith.index_cast %c224_55 : index to i64
    %297 = arith.addi %296, %295 : i64
    %298 = arith.subi %294, %c1_i64_78 : i64
    %299 = arith.muli %c1_i64_66, %298 : i64
    %300 = arith.subi %297, %299 : i64
    %301 = arith.subi %300, %c1_i64_78 : i64
    %302 = arith.floordivsi %301, %c2_i64_67 : i64
    %303 = arith.addi %302, %c1_i64_78 : i64
    %304 = arith.index_cast %303 : i64 to index
    %305 = tensor.empty(%293, %304) : tensor<1x64x?x?xf32>
    %306 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_43 : tensor<64xf32>) outs(%305 : tensor<1x64x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x64x?x?xf32>
    %307 = arith.floordivsi %c3_51, %274 : index
    %308 = arith.floordivsi %c64_57, %274 : index
    %c0_80 = arith.constant 0 : index
    %c1_81 = arith.constant 1 : index
    %309 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded, %cast_32 : tensor<?x?x?x?xf32>, tensor<64x3x7x7xf32>) outs(%306 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
    %cast_82 = tensor.cast %309 : tensor<1x64x?x?xf32> to tensor<1x64x112x112xf32>
    %c1_83 = arith.constant 1 : index
    %c1_84 = arith.constant 1 : index
    %c64_85 = arith.constant 64 : index
    %c2_86 = arith.constant 2 : index
    %c112 = arith.constant 112 : index
    %c3_87 = arith.constant 3 : index
    %c112_88 = arith.constant 112 : index
    %310 = tensor.empty() : tensor<1x64x112x112xf32>
    %311 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_82 : tensor<1x64x112x112xf32>) outs(%310 : tensor<1x64x112x112xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x64x112x112xf32>
    %cast_89 = tensor.cast %311 : tensor<1x64x112x112xf32> to tensor<1x64x112x112xf32>
    %312 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %313 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_90 = torch.constant.int 12
    %314 = torch.aten.item %312 : !torch.vtensor<[],f32> -> !torch.float
    %315 = torch_c.to_f64 %314
    %316 = torch.aten.item %313 : !torch.vtensor<[],si8> -> !torch.int
    %317 = torch_c.to_i64 %316
    %c1_91 = arith.constant 1 : index
    %c1_92 = arith.constant 1 : index
    %c64_93 = arith.constant 64 : index
    %c2_94 = arith.constant 2 : index
    %c112_95 = arith.constant 112 : index
    %c3_96 = arith.constant 3 : index
    %c112_97 = arith.constant 112 : index
    %318 = tensor.empty() : tensor<1x64x112x112xi8>
    %319 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_89 : tensor<1x64x112x112xf32>) outs(%318 : tensor<1x64x112x112xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %316
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %314
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x64x112x112xi8>
    %cast_98 = tensor.cast %319 : tensor<1x64x112x112xi8> to tensor<1x64x112x112xi8>
    %cast_99 = tensor.cast %cast_98 : tensor<1x64x112x112xi8> to tensor<1x64x112x112xi8>
    %320 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %321 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %322 = torch.aten.item %320 : !torch.vtensor<[],f32> -> !torch.float
    %323 = torch_c.to_f64 %322
    %324 = torch.aten.item %321 : !torch.vtensor<[],si8> -> !torch.int
    %325 = torch_c.to_i64 %324
    %cast_100 = tensor.cast %cast_99 : tensor<1x64x112x112xi8> to tensor<1x64x112x112xi8>
    %c1_101 = arith.constant 1 : index
    %c1_102 = arith.constant 1 : index
    %c64_103 = arith.constant 64 : index
    %c2_104 = arith.constant 2 : index
    %c112_105 = arith.constant 112 : index
    %c3_106 = arith.constant 3 : index
    %c112_107 = arith.constant 112 : index
    %326 = tensor.empty() : tensor<1x64x112x112xf32>
    %327 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_100 : tensor<1x64x112x112xi8>) outs(%326 : tensor<1x64x112x112xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %324
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %322
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x64x112x112xf32>
    %cast_108 = tensor.cast %327 : tensor<1x64x112x112xf32> to tensor<1x64x112x112xf32>
    %int3_109 = torch.constant.int 3
    %int3_110 = torch.constant.int 3
    %328 = torch.prim.ListConstruct %int3_109, %int3_110 : (!torch.int, !torch.int) -> !torch.list<int>
    %int1_111 = torch.constant.int 1
    %int1_112 = torch.constant.int 1
    %329 = torch.prim.ListConstruct %int1_111, %int1_112 : (!torch.int, !torch.int) -> !torch.list<int>
    %int2_113 = torch.constant.int 2
    %int2_114 = torch.constant.int 2
    %330 = torch.prim.ListConstruct %int2_113, %int2_114 : (!torch.int, !torch.int) -> !torch.list<int>
    %int1_115 = torch.constant.int 1
    %int1_116 = torch.constant.int 1
    %331 = torch.prim.ListConstruct %int1_115, %int1_116 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_117 = torch.constant.bool false
    %332 = torch_c.to_i64 %int3_109
    %333 = torch_c.to_i64 %int3_110
    %cst_118 = arith.constant 0xFF800000 : f32
    %padded_119 = tensor.pad %cast_108 low[0, 0, 1, 1] high[0, 0, 1, 1] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_118 : f32
    } : tensor<1x64x112x112xf32> to tensor<1x64x114x114xf32>
    %c0_120 = arith.constant 0 : index
    %c1_121 = arith.constant 1 : index
    %c1_122 = arith.constant 1 : index
    %c64_123 = arith.constant 64 : index
    %c1_i64_124 = arith.constant 1 : i64
    %c1_i64_125 = arith.constant 1 : i64
    %c1_i64_126 = arith.constant 1 : i64
    %c1_i64_127 = arith.constant 1 : i64
    %c2_i64_128 = arith.constant 2 : i64
    %c2_i64_129 = arith.constant 2 : i64
    %c3_130 = arith.constant 3 : index
    %c112_131 = arith.constant 112 : index
    %c1_i64_132 = arith.constant 1 : i64
    %c2_i64_133 = arith.constant 2 : i64
    %334 = arith.muli %c1_i64_125, %c2_i64_133 : i64
    %335 = arith.index_cast %c112_131 : index to i64
    %336 = arith.addi %335, %334 : i64
    %337 = arith.subi %333, %c1_i64_132 : i64
    %338 = arith.muli %c1_i64_127, %337 : i64
    %339 = arith.subi %336, %338 : i64
    %340 = arith.subi %339, %c1_i64_132 : i64
    %341 = arith.floordivsi %340, %c2_i64_129 : i64
    %342 = arith.addi %341, %c1_i64_132 : i64
    %343 = arith.index_cast %342 : i64 to index
    %c2_134 = arith.constant 2 : index
    %c112_135 = arith.constant 112 : index
    %c1_i64_136 = arith.constant 1 : i64
    %c2_i64_137 = arith.constant 2 : i64
    %344 = arith.muli %c1_i64_124, %c2_i64_137 : i64
    %345 = arith.index_cast %c112_135 : index to i64
    %346 = arith.addi %345, %344 : i64
    %347 = arith.subi %332, %c1_i64_136 : i64
    %348 = arith.muli %c1_i64_126, %347 : i64
    %349 = arith.subi %346, %348 : i64
    %350 = arith.subi %349, %c1_i64_136 : i64
    %351 = arith.floordivsi %350, %c2_i64_128 : i64
    %352 = arith.addi %351, %c1_i64_136 : i64
    %353 = arith.index_cast %352 : i64 to index
    %354 = tensor.empty(%353, %343) : tensor<1x64x?x?xf32>
    %355 = linalg.fill ins(%cst_118 : f32) outs(%354 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
    %356 = arith.index_cast %332 : i64 to index
    %357 = arith.index_cast %333 : i64 to index
    %358 = tensor.empty(%356, %357) : tensor<?x?xf32>
    %359 = linalg.pooling_nchw_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_119, %358 : tensor<1x64x114x114xf32>, tensor<?x?xf32>) outs(%355 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
    %cast_138 = tensor.cast %359 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
    %360 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %361 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_139 = torch.constant.int 12
    %362 = torch.aten.item %360 : !torch.vtensor<[],f32> -> !torch.float
    %363 = torch_c.to_f64 %362
    %364 = torch.aten.item %361 : !torch.vtensor<[],si8> -> !torch.int
    %365 = torch_c.to_i64 %364
    %c1_140 = arith.constant 1 : index
    %c1_141 = arith.constant 1 : index
    %c64_142 = arith.constant 64 : index
    %c2_143 = arith.constant 2 : index
    %c56 = arith.constant 56 : index
    %c3_144 = arith.constant 3 : index
    %c56_145 = arith.constant 56 : index
    %366 = tensor.empty() : tensor<1x64x56x56xi8>
    %367 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_138 : tensor<1x64x56x56xf32>) outs(%366 : tensor<1x64x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %364
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %362
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x64x56x56xi8>
    %cast_146 = tensor.cast %367 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %cast_147 = tensor.cast %cast_146 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %368 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %369 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %370 = torch.aten.item %368 : !torch.vtensor<[],f32> -> !torch.float
    %371 = torch_c.to_f64 %370
    %372 = torch.aten.item %369 : !torch.vtensor<[],si8> -> !torch.int
    %373 = torch_c.to_i64 %372
    %cast_148 = tensor.cast %cast_147 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %c1_149 = arith.constant 1 : index
    %c1_150 = arith.constant 1 : index
    %c64_151 = arith.constant 64 : index
    %c2_152 = arith.constant 2 : index
    %c56_153 = arith.constant 56 : index
    %c3_154 = arith.constant 3 : index
    %c56_155 = arith.constant 56 : index
    %374 = tensor.empty() : tensor<1x64x56x56xf32>
    %375 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_148 : tensor<1x64x56x56xi8>) outs(%374 : tensor<1x64x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %372
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %370
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x64x56x56xf32>
    %cast_156 = tensor.cast %375 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
    %376 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %377 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_157 = torch.constant.int 12
    %378 = torch.aten.item %376 : !torch.vtensor<[],f32> -> !torch.float
    %379 = torch_c.to_f64 %378
    %380 = torch.aten.item %377 : !torch.vtensor<[],si8> -> !torch.int
    %381 = torch_c.to_i64 %380
    %c1_158 = arith.constant 1 : index
    %c0_159 = arith.constant 0 : index
    %c64_160 = arith.constant 64 : index
    %c1_161 = arith.constant 1 : index
    %c64_162 = arith.constant 64 : index
    %382 = tensor.empty() : tensor<64x64x1x1xi8>
    %383 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%6 : tensor<64x64x1x1xf32>) outs(%382 : tensor<64x64x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %380
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %378
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64x64x1x1xi8>
    %cast_163 = tensor.cast %383 : tensor<64x64x1x1xi8> to tensor<64x64x1x1xi8>
    %cast_164 = tensor.cast %cast_163 : tensor<64x64x1x1xi8> to tensor<64x64x1x1xi8>
    %384 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %385 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %386 = torch.aten.item %384 : !torch.vtensor<[],f32> -> !torch.float
    %387 = torch_c.to_f64 %386
    %388 = torch.aten.item %385 : !torch.vtensor<[],si8> -> !torch.int
    %389 = torch_c.to_i64 %388
    %cast_165 = tensor.cast %cast_164 : tensor<64x64x1x1xi8> to tensor<64x64x1x1xi8>
    %c1_166 = arith.constant 1 : index
    %c0_167 = arith.constant 0 : index
    %c64_168 = arith.constant 64 : index
    %c1_169 = arith.constant 1 : index
    %c64_170 = arith.constant 64 : index
    %390 = tensor.empty() : tensor<64x64x1x1xf32>
    %391 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_165 : tensor<64x64x1x1xi8>) outs(%390 : tensor<64x64x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %388
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %386
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64x64x1x1xf32>
    %cast_171 = tensor.cast %391 : tensor<64x64x1x1xf32> to tensor<64x64x1x1xf32>
    %392 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %393 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_172 = torch.constant.int 12
    %394 = torch.aten.item %392 : !torch.vtensor<[],f32> -> !torch.float
    %395 = torch_c.to_f64 %394
    %396 = torch.aten.item %393 : !torch.vtensor<[],si8> -> !torch.int
    %397 = torch_c.to_i64 %396
    %c1_173 = arith.constant 1 : index
    %c0_174 = arith.constant 0 : index
    %c64_175 = arith.constant 64 : index
    %398 = tensor.empty() : tensor<64xi8>
    %399 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%8 : tensor<64xf32>) outs(%398 : tensor<64xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %396
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %394
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64xi8>
    %cast_176 = tensor.cast %399 : tensor<64xi8> to tensor<64xi8>
    %cast_177 = tensor.cast %cast_176 : tensor<64xi8> to tensor<64xi8>
    %400 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %401 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %402 = torch.aten.item %400 : !torch.vtensor<[],f32> -> !torch.float
    %403 = torch_c.to_f64 %402
    %404 = torch.aten.item %401 : !torch.vtensor<[],si8> -> !torch.int
    %405 = torch_c.to_i64 %404
    %cast_178 = tensor.cast %cast_177 : tensor<64xi8> to tensor<64xi8>
    %c1_179 = arith.constant 1 : index
    %c0_180 = arith.constant 0 : index
    %c64_181 = arith.constant 64 : index
    %406 = tensor.empty() : tensor<64xf32>
    %407 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_178 : tensor<64xi8>) outs(%406 : tensor<64xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %404
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %402
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64xf32>
    %cast_182 = tensor.cast %407 : tensor<64xf32> to tensor<64xf32>
    %int0_183 = torch.constant.int 0
    %int0_184 = torch.constant.int 0
    %int1_185 = torch.constant.int 1
    %int1_186 = torch.constant.int 1
    %int1_187 = torch.constant.int 1
    %int1_188 = torch.constant.int 1
    %int0_189 = torch.constant.int 0
    %408 = torch.prim.ListConstruct %int0_183, %int0_184 : (!torch.int, !torch.int) -> !torch.list<int>
    %409 = torch.prim.ListConstruct %int1_185, %int1_186 : (!torch.int, !torch.int) -> !torch.list<int>
    %410 = torch.prim.ListConstruct %int1_187, %int1_188 : (!torch.int, !torch.int) -> !torch.list<int>
    %411 = torch.prim.ListConstruct %int0_189, %int0_189 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_190 = torch.constant.bool false
    %int1_191 = torch.constant.int 1
    %412 = torch_c.to_i64 %int1_191
    %413 = torch_c.to_i64 %int0_183
    %414 = torch_c.to_i64 %int0_184
    %415 = torch_c.to_i64 %int0_189
    %416 = torch_c.to_i64 %int0_189
    %c0_192 = arith.constant 0 : index
    %c1_193 = arith.constant 1 : index
    %c1_194 = arith.constant 1 : index
    %c64_195 = arith.constant 64 : index
    %c2_196 = arith.constant 2 : index
    %c56_197 = arith.constant 56 : index
    %c3_198 = arith.constant 3 : index
    %c56_199 = arith.constant 56 : index
    %c0_200 = arith.constant 0 : index
    %c64_201 = arith.constant 64 : index
    %c1_202 = arith.constant 1 : index
    %c64_203 = arith.constant 64 : index
    %c2_204 = arith.constant 2 : index
    %c1_205 = arith.constant 1 : index
    %c3_206 = arith.constant 3 : index
    %c1_207 = arith.constant 1 : index
    %417 = arith.index_cast %412 : i64 to index
    %c0_208 = arith.constant 0 : index
    %418 = arith.remsi %c64_195, %417 : index
    %419 = arith.cmpi eq, %c0_208, %418 : index
    cf.assert %419, "invalid: groups must divide input channel size evenly."
    %c0_209 = arith.constant 0 : index
    %420 = arith.remsi %c64_201, %417 : index
    %421 = arith.cmpi eq, %c0_209, %420 : index
    cf.assert %421, "invalid: groups must divide weight batch size evenly."
    %c1_i64_210 = arith.constant 1 : i64
    %c1_i64_211 = arith.constant 1 : i64
    %c1_i64_212 = arith.constant 1 : i64
    %c1_i64_213 = arith.constant 1 : i64
    %cst_214 = arith.constant 0.000000e+00 : f32
    %c0_215 = arith.constant 0 : index
    %c1_216 = arith.constant 1 : index
    %c1_217 = arith.constant 1 : index
    %c64_218 = arith.constant 64 : index
    %c2_219 = arith.constant 2 : index
    %c56_220 = arith.constant 56 : index
    %c3_221 = arith.constant 3 : index
    %c56_222 = arith.constant 56 : index
    %c0_i64_223 = arith.constant 0 : i64
    %422 = arith.index_cast %c0_i64_223 : i64 to index
    %423 = arith.index_cast %c0_i64_223 : i64 to index
    %424 = arith.index_cast %413 : i64 to index
    %425 = arith.index_cast %414 : i64 to index
    %padded_224 = tensor.pad %cast_156 low[%422, %423, %424, %425] high[%422, %423, %424, %425] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_214 : f32
    } : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
    %426 = arith.index_cast %c1_205 : index to i64
    %c1_i64_225 = arith.constant 1 : i64
    %c2_i64_226 = arith.constant 2 : i64
    %427 = arith.muli %413, %c2_i64_226 : i64
    %428 = arith.index_cast %c56_197 : index to i64
    %429 = arith.addi %428, %427 : i64
    %430 = arith.subi %426, %c1_i64_225 : i64
    %431 = arith.muli %c1_i64_210, %430 : i64
    %432 = arith.subi %429, %431 : i64
    %433 = arith.subi %432, %c1_i64_225 : i64
    %434 = arith.floordivsi %433, %c1_i64_212 : i64
    %435 = arith.addi %434, %c1_i64_225 : i64
    %436 = arith.index_cast %435 : i64 to index
    %437 = arith.index_cast %c1_207 : index to i64
    %c1_i64_227 = arith.constant 1 : i64
    %c2_i64_228 = arith.constant 2 : i64
    %438 = arith.muli %414, %c2_i64_228 : i64
    %439 = arith.index_cast %c56_199 : index to i64
    %440 = arith.addi %439, %438 : i64
    %441 = arith.subi %437, %c1_i64_227 : i64
    %442 = arith.muli %c1_i64_211, %441 : i64
    %443 = arith.subi %440, %442 : i64
    %444 = arith.subi %443, %c1_i64_227 : i64
    %445 = arith.floordivsi %444, %c1_i64_213 : i64
    %446 = arith.addi %445, %c1_i64_227 : i64
    %447 = arith.index_cast %446 : i64 to index
    %448 = tensor.empty(%436, %447) : tensor<1x64x?x?xf32>
    %449 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_182 : tensor<64xf32>) outs(%448 : tensor<1x64x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x64x?x?xf32>
    %450 = arith.floordivsi %c64_195, %417 : index
    %451 = arith.floordivsi %c64_201, %417 : index
    %c0_229 = arith.constant 0 : index
    %c1_230 = arith.constant 1 : index
    %452 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_224, %cast_171 : tensor<?x?x?x?xf32>, tensor<64x64x1x1xf32>) outs(%449 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
    %cast_231 = tensor.cast %452 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
    %c1_232 = arith.constant 1 : index
    %c1_233 = arith.constant 1 : index
    %c64_234 = arith.constant 64 : index
    %c2_235 = arith.constant 2 : index
    %c56_236 = arith.constant 56 : index
    %c3_237 = arith.constant 3 : index
    %c56_238 = arith.constant 56 : index
    %453 = tensor.empty() : tensor<1x64x56x56xf32>
    %454 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_231 : tensor<1x64x56x56xf32>) outs(%453 : tensor<1x64x56x56xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x64x56x56xf32>
    %cast_239 = tensor.cast %454 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
    %455 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %456 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_240 = torch.constant.int 12
    %457 = torch.aten.item %455 : !torch.vtensor<[],f32> -> !torch.float
    %458 = torch_c.to_f64 %457
    %459 = torch.aten.item %456 : !torch.vtensor<[],si8> -> !torch.int
    %460 = torch_c.to_i64 %459
    %c1_241 = arith.constant 1 : index
    %c1_242 = arith.constant 1 : index
    %c64_243 = arith.constant 64 : index
    %c2_244 = arith.constant 2 : index
    %c56_245 = arith.constant 56 : index
    %c3_246 = arith.constant 3 : index
    %c56_247 = arith.constant 56 : index
    %461 = tensor.empty() : tensor<1x64x56x56xi8>
    %462 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_239 : tensor<1x64x56x56xf32>) outs(%461 : tensor<1x64x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %459
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %457
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x64x56x56xi8>
    %cast_248 = tensor.cast %462 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %cast_249 = tensor.cast %cast_248 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %463 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %464 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %465 = torch.aten.item %463 : !torch.vtensor<[],f32> -> !torch.float
    %466 = torch_c.to_f64 %465
    %467 = torch.aten.item %464 : !torch.vtensor<[],si8> -> !torch.int
    %468 = torch_c.to_i64 %467
    %cast_250 = tensor.cast %cast_249 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %c1_251 = arith.constant 1 : index
    %c1_252 = arith.constant 1 : index
    %c64_253 = arith.constant 64 : index
    %c2_254 = arith.constant 2 : index
    %c56_255 = arith.constant 56 : index
    %c3_256 = arith.constant 3 : index
    %c56_257 = arith.constant 56 : index
    %469 = tensor.empty() : tensor<1x64x56x56xf32>
    %470 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_250 : tensor<1x64x56x56xi8>) outs(%469 : tensor<1x64x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %467
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %465
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x64x56x56xf32>
    %cast_258 = tensor.cast %470 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
    %471 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %472 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_259 = torch.constant.int 12
    %473 = torch.aten.item %471 : !torch.vtensor<[],f32> -> !torch.float
    %474 = torch_c.to_f64 %473
    %475 = torch.aten.item %472 : !torch.vtensor<[],si8> -> !torch.int
    %476 = torch_c.to_i64 %475
    %c1_260 = arith.constant 1 : index
    %c0_261 = arith.constant 0 : index
    %c64_262 = arith.constant 64 : index
    %c1_263 = arith.constant 1 : index
    %c64_264 = arith.constant 64 : index
    %c2_265 = arith.constant 2 : index
    %c3_266 = arith.constant 3 : index
    %c3_267 = arith.constant 3 : index
    %c3_268 = arith.constant 3 : index
    %477 = tensor.empty() : tensor<64x64x3x3xi8>
    %478 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%10 : tensor<64x64x3x3xf32>) outs(%477 : tensor<64x64x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %475
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %473
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64x64x3x3xi8>
    %cast_269 = tensor.cast %478 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
    %cast_270 = tensor.cast %cast_269 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
    %479 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %480 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %481 = torch.aten.item %479 : !torch.vtensor<[],f32> -> !torch.float
    %482 = torch_c.to_f64 %481
    %483 = torch.aten.item %480 : !torch.vtensor<[],si8> -> !torch.int
    %484 = torch_c.to_i64 %483
    %cast_271 = tensor.cast %cast_270 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
    %c1_272 = arith.constant 1 : index
    %c0_273 = arith.constant 0 : index
    %c64_274 = arith.constant 64 : index
    %c1_275 = arith.constant 1 : index
    %c64_276 = arith.constant 64 : index
    %c2_277 = arith.constant 2 : index
    %c3_278 = arith.constant 3 : index
    %c3_279 = arith.constant 3 : index
    %c3_280 = arith.constant 3 : index
    %485 = tensor.empty() : tensor<64x64x3x3xf32>
    %486 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_271 : tensor<64x64x3x3xi8>) outs(%485 : tensor<64x64x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %483
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %481
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64x64x3x3xf32>
    %cast_281 = tensor.cast %486 : tensor<64x64x3x3xf32> to tensor<64x64x3x3xf32>
    %487 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %488 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_282 = torch.constant.int 12
    %489 = torch.aten.item %487 : !torch.vtensor<[],f32> -> !torch.float
    %490 = torch_c.to_f64 %489
    %491 = torch.aten.item %488 : !torch.vtensor<[],si8> -> !torch.int
    %492 = torch_c.to_i64 %491
    %c1_283 = arith.constant 1 : index
    %c0_284 = arith.constant 0 : index
    %c64_285 = arith.constant 64 : index
    %493 = tensor.empty() : tensor<64xi8>
    %494 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%12 : tensor<64xf32>) outs(%493 : tensor<64xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %491
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %489
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64xi8>
    %cast_286 = tensor.cast %494 : tensor<64xi8> to tensor<64xi8>
    %cast_287 = tensor.cast %cast_286 : tensor<64xi8> to tensor<64xi8>
    %495 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %496 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %497 = torch.aten.item %495 : !torch.vtensor<[],f32> -> !torch.float
    %498 = torch_c.to_f64 %497
    %499 = torch.aten.item %496 : !torch.vtensor<[],si8> -> !torch.int
    %500 = torch_c.to_i64 %499
    %cast_288 = tensor.cast %cast_287 : tensor<64xi8> to tensor<64xi8>
    %c1_289 = arith.constant 1 : index
    %c0_290 = arith.constant 0 : index
    %c64_291 = arith.constant 64 : index
    %501 = tensor.empty() : tensor<64xf32>
    %502 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_288 : tensor<64xi8>) outs(%501 : tensor<64xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %499
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %497
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64xf32>
    %cast_292 = tensor.cast %502 : tensor<64xf32> to tensor<64xf32>
    %int1_293 = torch.constant.int 1
    %int1_294 = torch.constant.int 1
    %int1_295 = torch.constant.int 1
    %int1_296 = torch.constant.int 1
    %int1_297 = torch.constant.int 1
    %int1_298 = torch.constant.int 1
    %int0_299 = torch.constant.int 0
    %503 = torch.prim.ListConstruct %int1_293, %int1_294 : (!torch.int, !torch.int) -> !torch.list<int>
    %504 = torch.prim.ListConstruct %int1_295, %int1_296 : (!torch.int, !torch.int) -> !torch.list<int>
    %505 = torch.prim.ListConstruct %int1_297, %int1_298 : (!torch.int, !torch.int) -> !torch.list<int>
    %506 = torch.prim.ListConstruct %int0_299, %int0_299 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_300 = torch.constant.bool false
    %int1_301 = torch.constant.int 1
    %507 = torch_c.to_i64 %int1_301
    %508 = torch_c.to_i64 %int1_293
    %509 = torch_c.to_i64 %int1_294
    %510 = torch_c.to_i64 %int0_299
    %511 = torch_c.to_i64 %int0_299
    %c0_302 = arith.constant 0 : index
    %c1_303 = arith.constant 1 : index
    %c1_304 = arith.constant 1 : index
    %c64_305 = arith.constant 64 : index
    %c2_306 = arith.constant 2 : index
    %c56_307 = arith.constant 56 : index
    %c3_308 = arith.constant 3 : index
    %c56_309 = arith.constant 56 : index
    %c0_310 = arith.constant 0 : index
    %c64_311 = arith.constant 64 : index
    %c1_312 = arith.constant 1 : index
    %c64_313 = arith.constant 64 : index
    %c2_314 = arith.constant 2 : index
    %c3_315 = arith.constant 3 : index
    %c3_316 = arith.constant 3 : index
    %c3_317 = arith.constant 3 : index
    %512 = arith.index_cast %507 : i64 to index
    %c0_318 = arith.constant 0 : index
    %513 = arith.remsi %c64_305, %512 : index
    %514 = arith.cmpi eq, %c0_318, %513 : index
    cf.assert %514, "invalid: groups must divide input channel size evenly."
    %c0_319 = arith.constant 0 : index
    %515 = arith.remsi %c64_311, %512 : index
    %516 = arith.cmpi eq, %c0_319, %515 : index
    cf.assert %516, "invalid: groups must divide weight batch size evenly."
    %c1_i64_320 = arith.constant 1 : i64
    %c1_i64_321 = arith.constant 1 : i64
    %c1_i64_322 = arith.constant 1 : i64
    %c1_i64_323 = arith.constant 1 : i64
    %cst_324 = arith.constant 0.000000e+00 : f32
    %c0_325 = arith.constant 0 : index
    %c1_326 = arith.constant 1 : index
    %c1_327 = arith.constant 1 : index
    %c64_328 = arith.constant 64 : index
    %c2_329 = arith.constant 2 : index
    %c56_330 = arith.constant 56 : index
    %c3_331 = arith.constant 3 : index
    %c56_332 = arith.constant 56 : index
    %c0_i64_333 = arith.constant 0 : i64
    %517 = arith.index_cast %c0_i64_333 : i64 to index
    %518 = arith.index_cast %c0_i64_333 : i64 to index
    %519 = arith.index_cast %508 : i64 to index
    %520 = arith.index_cast %509 : i64 to index
    %padded_334 = tensor.pad %cast_258 low[%517, %518, %519, %520] high[%517, %518, %519, %520] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_324 : f32
    } : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
    %521 = arith.index_cast %c3_315 : index to i64
    %c1_i64_335 = arith.constant 1 : i64
    %c2_i64_336 = arith.constant 2 : i64
    %522 = arith.muli %508, %c2_i64_336 : i64
    %523 = arith.index_cast %c56_307 : index to i64
    %524 = arith.addi %523, %522 : i64
    %525 = arith.subi %521, %c1_i64_335 : i64
    %526 = arith.muli %c1_i64_320, %525 : i64
    %527 = arith.subi %524, %526 : i64
    %528 = arith.subi %527, %c1_i64_335 : i64
    %529 = arith.floordivsi %528, %c1_i64_322 : i64
    %530 = arith.addi %529, %c1_i64_335 : i64
    %531 = arith.index_cast %530 : i64 to index
    %532 = arith.index_cast %c3_317 : index to i64
    %c1_i64_337 = arith.constant 1 : i64
    %c2_i64_338 = arith.constant 2 : i64
    %533 = arith.muli %509, %c2_i64_338 : i64
    %534 = arith.index_cast %c56_309 : index to i64
    %535 = arith.addi %534, %533 : i64
    %536 = arith.subi %532, %c1_i64_337 : i64
    %537 = arith.muli %c1_i64_321, %536 : i64
    %538 = arith.subi %535, %537 : i64
    %539 = arith.subi %538, %c1_i64_337 : i64
    %540 = arith.floordivsi %539, %c1_i64_323 : i64
    %541 = arith.addi %540, %c1_i64_337 : i64
    %542 = arith.index_cast %541 : i64 to index
    %543 = tensor.empty(%531, %542) : tensor<1x64x?x?xf32>
    %544 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_292 : tensor<64xf32>) outs(%543 : tensor<1x64x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x64x?x?xf32>
    %545 = arith.floordivsi %c64_305, %512 : index
    %546 = arith.floordivsi %c64_311, %512 : index
    %c0_339 = arith.constant 0 : index
    %c1_340 = arith.constant 1 : index
    %547 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_334, %cast_281 : tensor<?x?x?x?xf32>, tensor<64x64x3x3xf32>) outs(%544 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
    %cast_341 = tensor.cast %547 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
    %c1_342 = arith.constant 1 : index
    %c1_343 = arith.constant 1 : index
    %c64_344 = arith.constant 64 : index
    %c2_345 = arith.constant 2 : index
    %c56_346 = arith.constant 56 : index
    %c3_347 = arith.constant 3 : index
    %c56_348 = arith.constant 56 : index
    %548 = tensor.empty() : tensor<1x64x56x56xf32>
    %549 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_341 : tensor<1x64x56x56xf32>) outs(%548 : tensor<1x64x56x56xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x64x56x56xf32>
    %cast_349 = tensor.cast %549 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
    %550 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %551 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_350 = torch.constant.int 12
    %552 = torch.aten.item %550 : !torch.vtensor<[],f32> -> !torch.float
    %553 = torch_c.to_f64 %552
    %554 = torch.aten.item %551 : !torch.vtensor<[],si8> -> !torch.int
    %555 = torch_c.to_i64 %554
    %c1_351 = arith.constant 1 : index
    %c1_352 = arith.constant 1 : index
    %c64_353 = arith.constant 64 : index
    %c2_354 = arith.constant 2 : index
    %c56_355 = arith.constant 56 : index
    %c3_356 = arith.constant 3 : index
    %c56_357 = arith.constant 56 : index
    %556 = tensor.empty() : tensor<1x64x56x56xi8>
    %557 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_349 : tensor<1x64x56x56xf32>) outs(%556 : tensor<1x64x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %554
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %552
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x64x56x56xi8>
    %cast_358 = tensor.cast %557 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %cast_359 = tensor.cast %cast_358 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %558 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %559 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %560 = torch.aten.item %558 : !torch.vtensor<[],f32> -> !torch.float
    %561 = torch_c.to_f64 %560
    %562 = torch.aten.item %559 : !torch.vtensor<[],si8> -> !torch.int
    %563 = torch_c.to_i64 %562
    %cast_360 = tensor.cast %cast_359 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %c1_361 = arith.constant 1 : index
    %c1_362 = arith.constant 1 : index
    %c64_363 = arith.constant 64 : index
    %c2_364 = arith.constant 2 : index
    %c56_365 = arith.constant 56 : index
    %c3_366 = arith.constant 3 : index
    %c56_367 = arith.constant 56 : index
    %564 = tensor.empty() : tensor<1x64x56x56xf32>
    %565 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_360 : tensor<1x64x56x56xi8>) outs(%564 : tensor<1x64x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %562
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %560
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x64x56x56xf32>
    %cast_368 = tensor.cast %565 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
    %566 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %567 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_369 = torch.constant.int 12
    %568 = torch.aten.item %566 : !torch.vtensor<[],f32> -> !torch.float
    %569 = torch_c.to_f64 %568
    %570 = torch.aten.item %567 : !torch.vtensor<[],si8> -> !torch.int
    %571 = torch_c.to_i64 %570
    %c1_370 = arith.constant 1 : index
    %c0_371 = arith.constant 0 : index
    %c256 = arith.constant 256 : index
    %c1_372 = arith.constant 1 : index
    %c64_373 = arith.constant 64 : index
    %572 = tensor.empty() : tensor<256x64x1x1xi8>
    %573 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%14 : tensor<256x64x1x1xf32>) outs(%572 : tensor<256x64x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %570
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %568
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x64x1x1xi8>
    %cast_374 = tensor.cast %573 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
    %cast_375 = tensor.cast %cast_374 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
    %574 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %575 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %576 = torch.aten.item %574 : !torch.vtensor<[],f32> -> !torch.float
    %577 = torch_c.to_f64 %576
    %578 = torch.aten.item %575 : !torch.vtensor<[],si8> -> !torch.int
    %579 = torch_c.to_i64 %578
    %cast_376 = tensor.cast %cast_375 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
    %c1_377 = arith.constant 1 : index
    %c0_378 = arith.constant 0 : index
    %c256_379 = arith.constant 256 : index
    %c1_380 = arith.constant 1 : index
    %c64_381 = arith.constant 64 : index
    %580 = tensor.empty() : tensor<256x64x1x1xf32>
    %581 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_376 : tensor<256x64x1x1xi8>) outs(%580 : tensor<256x64x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %578
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %576
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x64x1x1xf32>
    %cast_382 = tensor.cast %581 : tensor<256x64x1x1xf32> to tensor<256x64x1x1xf32>
    %582 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %583 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_383 = torch.constant.int 12
    %584 = torch.aten.item %582 : !torch.vtensor<[],f32> -> !torch.float
    %585 = torch_c.to_f64 %584
    %586 = torch.aten.item %583 : !torch.vtensor<[],si8> -> !torch.int
    %587 = torch_c.to_i64 %586
    %c1_384 = arith.constant 1 : index
    %c0_385 = arith.constant 0 : index
    %c256_386 = arith.constant 256 : index
    %588 = tensor.empty() : tensor<256xi8>
    %589 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%16 : tensor<256xf32>) outs(%588 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %586
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %584
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_387 = tensor.cast %589 : tensor<256xi8> to tensor<256xi8>
    %cast_388 = tensor.cast %cast_387 : tensor<256xi8> to tensor<256xi8>
    %590 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %591 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %592 = torch.aten.item %590 : !torch.vtensor<[],f32> -> !torch.float
    %593 = torch_c.to_f64 %592
    %594 = torch.aten.item %591 : !torch.vtensor<[],si8> -> !torch.int
    %595 = torch_c.to_i64 %594
    %cast_389 = tensor.cast %cast_388 : tensor<256xi8> to tensor<256xi8>
    %c1_390 = arith.constant 1 : index
    %c0_391 = arith.constant 0 : index
    %c256_392 = arith.constant 256 : index
    %596 = tensor.empty() : tensor<256xf32>
    %597 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_389 : tensor<256xi8>) outs(%596 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %594
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %592
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_393 = tensor.cast %597 : tensor<256xf32> to tensor<256xf32>
    %int0_394 = torch.constant.int 0
    %int0_395 = torch.constant.int 0
    %int1_396 = torch.constant.int 1
    %int1_397 = torch.constant.int 1
    %int1_398 = torch.constant.int 1
    %int1_399 = torch.constant.int 1
    %int0_400 = torch.constant.int 0
    %598 = torch.prim.ListConstruct %int0_394, %int0_395 : (!torch.int, !torch.int) -> !torch.list<int>
    %599 = torch.prim.ListConstruct %int1_396, %int1_397 : (!torch.int, !torch.int) -> !torch.list<int>
    %600 = torch.prim.ListConstruct %int1_398, %int1_399 : (!torch.int, !torch.int) -> !torch.list<int>
    %601 = torch.prim.ListConstruct %int0_400, %int0_400 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_401 = torch.constant.bool false
    %int1_402 = torch.constant.int 1
    %602 = torch_c.to_i64 %int1_402
    %603 = torch_c.to_i64 %int0_394
    %604 = torch_c.to_i64 %int0_395
    %605 = torch_c.to_i64 %int0_400
    %606 = torch_c.to_i64 %int0_400
    %c0_403 = arith.constant 0 : index
    %c1_404 = arith.constant 1 : index
    %c1_405 = arith.constant 1 : index
    %c64_406 = arith.constant 64 : index
    %c2_407 = arith.constant 2 : index
    %c56_408 = arith.constant 56 : index
    %c3_409 = arith.constant 3 : index
    %c56_410 = arith.constant 56 : index
    %c0_411 = arith.constant 0 : index
    %c256_412 = arith.constant 256 : index
    %c1_413 = arith.constant 1 : index
    %c64_414 = arith.constant 64 : index
    %c2_415 = arith.constant 2 : index
    %c1_416 = arith.constant 1 : index
    %c3_417 = arith.constant 3 : index
    %c1_418 = arith.constant 1 : index
    %607 = arith.index_cast %602 : i64 to index
    %c0_419 = arith.constant 0 : index
    %608 = arith.remsi %c64_406, %607 : index
    %609 = arith.cmpi eq, %c0_419, %608 : index
    cf.assert %609, "invalid: groups must divide input channel size evenly."
    %c0_420 = arith.constant 0 : index
    %610 = arith.remsi %c256_412, %607 : index
    %611 = arith.cmpi eq, %c0_420, %610 : index
    cf.assert %611, "invalid: groups must divide weight batch size evenly."
    %c1_i64_421 = arith.constant 1 : i64
    %c1_i64_422 = arith.constant 1 : i64
    %c1_i64_423 = arith.constant 1 : i64
    %c1_i64_424 = arith.constant 1 : i64
    %cst_425 = arith.constant 0.000000e+00 : f32
    %c0_426 = arith.constant 0 : index
    %c1_427 = arith.constant 1 : index
    %c1_428 = arith.constant 1 : index
    %c64_429 = arith.constant 64 : index
    %c2_430 = arith.constant 2 : index
    %c56_431 = arith.constant 56 : index
    %c3_432 = arith.constant 3 : index
    %c56_433 = arith.constant 56 : index
    %c0_i64_434 = arith.constant 0 : i64
    %612 = arith.index_cast %c0_i64_434 : i64 to index
    %613 = arith.index_cast %c0_i64_434 : i64 to index
    %614 = arith.index_cast %603 : i64 to index
    %615 = arith.index_cast %604 : i64 to index
    %padded_435 = tensor.pad %cast_368 low[%612, %613, %614, %615] high[%612, %613, %614, %615] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_425 : f32
    } : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
    %616 = arith.index_cast %c1_416 : index to i64
    %c1_i64_436 = arith.constant 1 : i64
    %c2_i64_437 = arith.constant 2 : i64
    %617 = arith.muli %603, %c2_i64_437 : i64
    %618 = arith.index_cast %c56_408 : index to i64
    %619 = arith.addi %618, %617 : i64
    %620 = arith.subi %616, %c1_i64_436 : i64
    %621 = arith.muli %c1_i64_421, %620 : i64
    %622 = arith.subi %619, %621 : i64
    %623 = arith.subi %622, %c1_i64_436 : i64
    %624 = arith.floordivsi %623, %c1_i64_423 : i64
    %625 = arith.addi %624, %c1_i64_436 : i64
    %626 = arith.index_cast %625 : i64 to index
    %627 = arith.index_cast %c1_418 : index to i64
    %c1_i64_438 = arith.constant 1 : i64
    %c2_i64_439 = arith.constant 2 : i64
    %628 = arith.muli %604, %c2_i64_439 : i64
    %629 = arith.index_cast %c56_410 : index to i64
    %630 = arith.addi %629, %628 : i64
    %631 = arith.subi %627, %c1_i64_438 : i64
    %632 = arith.muli %c1_i64_422, %631 : i64
    %633 = arith.subi %630, %632 : i64
    %634 = arith.subi %633, %c1_i64_438 : i64
    %635 = arith.floordivsi %634, %c1_i64_424 : i64
    %636 = arith.addi %635, %c1_i64_438 : i64
    %637 = arith.index_cast %636 : i64 to index
    %638 = tensor.empty(%626, %637) : tensor<1x256x?x?xf32>
    %639 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_393 : tensor<256xf32>) outs(%638 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %640 = arith.floordivsi %c64_406, %607 : index
    %641 = arith.floordivsi %c256_412, %607 : index
    %c0_440 = arith.constant 0 : index
    %c1_441 = arith.constant 1 : index
    %642 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_435, %cast_382 : tensor<?x?x?x?xf32>, tensor<256x64x1x1xf32>) outs(%639 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_442 = tensor.cast %642 : tensor<1x256x?x?xf32> to tensor<1x256x56x56xf32>
    %643 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %644 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_443 = torch.constant.int 12
    %645 = torch.aten.item %643 : !torch.vtensor<[],f32> -> !torch.float
    %646 = torch_c.to_f64 %645
    %647 = torch.aten.item %644 : !torch.vtensor<[],si8> -> !torch.int
    %648 = torch_c.to_i64 %647
    %c1_444 = arith.constant 1 : index
    %c1_445 = arith.constant 1 : index
    %c256_446 = arith.constant 256 : index
    %c2_447 = arith.constant 2 : index
    %c56_448 = arith.constant 56 : index
    %c3_449 = arith.constant 3 : index
    %c56_450 = arith.constant 56 : index
    %649 = tensor.empty() : tensor<1x256x56x56xi8>
    %650 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_442 : tensor<1x256x56x56xf32>) outs(%649 : tensor<1x256x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %647
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %645
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x56x56xi8>
    %cast_451 = tensor.cast %650 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %cast_452 = tensor.cast %cast_451 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %651 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %652 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %653 = torch.aten.item %651 : !torch.vtensor<[],f32> -> !torch.float
    %654 = torch_c.to_f64 %653
    %655 = torch.aten.item %652 : !torch.vtensor<[],si8> -> !torch.int
    %656 = torch_c.to_i64 %655
    %cast_453 = tensor.cast %cast_452 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %c1_454 = arith.constant 1 : index
    %c1_455 = arith.constant 1 : index
    %c256_456 = arith.constant 256 : index
    %c2_457 = arith.constant 2 : index
    %c56_458 = arith.constant 56 : index
    %c3_459 = arith.constant 3 : index
    %c56_460 = arith.constant 56 : index
    %657 = tensor.empty() : tensor<1x256x56x56xf32>
    %658 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_453 : tensor<1x256x56x56xi8>) outs(%657 : tensor<1x256x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %655
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %653
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x56x56xf32>
    %cast_461 = tensor.cast %658 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
    %659 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %660 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_462 = torch.constant.int 12
    %661 = torch.aten.item %659 : !torch.vtensor<[],f32> -> !torch.float
    %662 = torch_c.to_f64 %661
    %663 = torch.aten.item %660 : !torch.vtensor<[],si8> -> !torch.int
    %664 = torch_c.to_i64 %663
    %c1_463 = arith.constant 1 : index
    %c0_464 = arith.constant 0 : index
    %c256_465 = arith.constant 256 : index
    %c1_466 = arith.constant 1 : index
    %c64_467 = arith.constant 64 : index
    %665 = tensor.empty() : tensor<256x64x1x1xi8>
    %666 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%18 : tensor<256x64x1x1xf32>) outs(%665 : tensor<256x64x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %663
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %661
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x64x1x1xi8>
    %cast_468 = tensor.cast %666 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
    %cast_469 = tensor.cast %cast_468 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
    %667 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %668 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %669 = torch.aten.item %667 : !torch.vtensor<[],f32> -> !torch.float
    %670 = torch_c.to_f64 %669
    %671 = torch.aten.item %668 : !torch.vtensor<[],si8> -> !torch.int
    %672 = torch_c.to_i64 %671
    %cast_470 = tensor.cast %cast_469 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
    %c1_471 = arith.constant 1 : index
    %c0_472 = arith.constant 0 : index
    %c256_473 = arith.constant 256 : index
    %c1_474 = arith.constant 1 : index
    %c64_475 = arith.constant 64 : index
    %673 = tensor.empty() : tensor<256x64x1x1xf32>
    %674 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_470 : tensor<256x64x1x1xi8>) outs(%673 : tensor<256x64x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %671
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %669
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x64x1x1xf32>
    %cast_476 = tensor.cast %674 : tensor<256x64x1x1xf32> to tensor<256x64x1x1xf32>
    %675 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %676 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_477 = torch.constant.int 12
    %677 = torch.aten.item %675 : !torch.vtensor<[],f32> -> !torch.float
    %678 = torch_c.to_f64 %677
    %679 = torch.aten.item %676 : !torch.vtensor<[],si8> -> !torch.int
    %680 = torch_c.to_i64 %679
    %c1_478 = arith.constant 1 : index
    %c0_479 = arith.constant 0 : index
    %c256_480 = arith.constant 256 : index
    %681 = tensor.empty() : tensor<256xi8>
    %682 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%20 : tensor<256xf32>) outs(%681 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %679
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %677
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_481 = tensor.cast %682 : tensor<256xi8> to tensor<256xi8>
    %cast_482 = tensor.cast %cast_481 : tensor<256xi8> to tensor<256xi8>
    %683 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %684 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %685 = torch.aten.item %683 : !torch.vtensor<[],f32> -> !torch.float
    %686 = torch_c.to_f64 %685
    %687 = torch.aten.item %684 : !torch.vtensor<[],si8> -> !torch.int
    %688 = torch_c.to_i64 %687
    %cast_483 = tensor.cast %cast_482 : tensor<256xi8> to tensor<256xi8>
    %c1_484 = arith.constant 1 : index
    %c0_485 = arith.constant 0 : index
    %c256_486 = arith.constant 256 : index
    %689 = tensor.empty() : tensor<256xf32>
    %690 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_483 : tensor<256xi8>) outs(%689 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %687
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %685
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_487 = tensor.cast %690 : tensor<256xf32> to tensor<256xf32>
    %int0_488 = torch.constant.int 0
    %int0_489 = torch.constant.int 0
    %int1_490 = torch.constant.int 1
    %int1_491 = torch.constant.int 1
    %int1_492 = torch.constant.int 1
    %int1_493 = torch.constant.int 1
    %int0_494 = torch.constant.int 0
    %691 = torch.prim.ListConstruct %int0_488, %int0_489 : (!torch.int, !torch.int) -> !torch.list<int>
    %692 = torch.prim.ListConstruct %int1_490, %int1_491 : (!torch.int, !torch.int) -> !torch.list<int>
    %693 = torch.prim.ListConstruct %int1_492, %int1_493 : (!torch.int, !torch.int) -> !torch.list<int>
    %694 = torch.prim.ListConstruct %int0_494, %int0_494 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_495 = torch.constant.bool false
    %int1_496 = torch.constant.int 1
    %695 = torch_c.to_i64 %int1_496
    %696 = torch_c.to_i64 %int0_488
    %697 = torch_c.to_i64 %int0_489
    %698 = torch_c.to_i64 %int0_494
    %699 = torch_c.to_i64 %int0_494
    %c0_497 = arith.constant 0 : index
    %c1_498 = arith.constant 1 : index
    %c1_499 = arith.constant 1 : index
    %c64_500 = arith.constant 64 : index
    %c2_501 = arith.constant 2 : index
    %c56_502 = arith.constant 56 : index
    %c3_503 = arith.constant 3 : index
    %c56_504 = arith.constant 56 : index
    %c0_505 = arith.constant 0 : index
    %c256_506 = arith.constant 256 : index
    %c1_507 = arith.constant 1 : index
    %c64_508 = arith.constant 64 : index
    %c2_509 = arith.constant 2 : index
    %c1_510 = arith.constant 1 : index
    %c3_511 = arith.constant 3 : index
    %c1_512 = arith.constant 1 : index
    %700 = arith.index_cast %695 : i64 to index
    %c0_513 = arith.constant 0 : index
    %701 = arith.remsi %c64_500, %700 : index
    %702 = arith.cmpi eq, %c0_513, %701 : index
    cf.assert %702, "invalid: groups must divide input channel size evenly."
    %c0_514 = arith.constant 0 : index
    %703 = arith.remsi %c256_506, %700 : index
    %704 = arith.cmpi eq, %c0_514, %703 : index
    cf.assert %704, "invalid: groups must divide weight batch size evenly."
    %c1_i64_515 = arith.constant 1 : i64
    %c1_i64_516 = arith.constant 1 : i64
    %c1_i64_517 = arith.constant 1 : i64
    %c1_i64_518 = arith.constant 1 : i64
    %cst_519 = arith.constant 0.000000e+00 : f32
    %c0_520 = arith.constant 0 : index
    %c1_521 = arith.constant 1 : index
    %c1_522 = arith.constant 1 : index
    %c64_523 = arith.constant 64 : index
    %c2_524 = arith.constant 2 : index
    %c56_525 = arith.constant 56 : index
    %c3_526 = arith.constant 3 : index
    %c56_527 = arith.constant 56 : index
    %c0_i64_528 = arith.constant 0 : i64
    %705 = arith.index_cast %c0_i64_528 : i64 to index
    %706 = arith.index_cast %c0_i64_528 : i64 to index
    %707 = arith.index_cast %696 : i64 to index
    %708 = arith.index_cast %697 : i64 to index
    %padded_529 = tensor.pad %cast_156 low[%705, %706, %707, %708] high[%705, %706, %707, %708] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_519 : f32
    } : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
    %709 = arith.index_cast %c1_510 : index to i64
    %c1_i64_530 = arith.constant 1 : i64
    %c2_i64_531 = arith.constant 2 : i64
    %710 = arith.muli %696, %c2_i64_531 : i64
    %711 = arith.index_cast %c56_502 : index to i64
    %712 = arith.addi %711, %710 : i64
    %713 = arith.subi %709, %c1_i64_530 : i64
    %714 = arith.muli %c1_i64_515, %713 : i64
    %715 = arith.subi %712, %714 : i64
    %716 = arith.subi %715, %c1_i64_530 : i64
    %717 = arith.floordivsi %716, %c1_i64_517 : i64
    %718 = arith.addi %717, %c1_i64_530 : i64
    %719 = arith.index_cast %718 : i64 to index
    %720 = arith.index_cast %c1_512 : index to i64
    %c1_i64_532 = arith.constant 1 : i64
    %c2_i64_533 = arith.constant 2 : i64
    %721 = arith.muli %697, %c2_i64_533 : i64
    %722 = arith.index_cast %c56_504 : index to i64
    %723 = arith.addi %722, %721 : i64
    %724 = arith.subi %720, %c1_i64_532 : i64
    %725 = arith.muli %c1_i64_516, %724 : i64
    %726 = arith.subi %723, %725 : i64
    %727 = arith.subi %726, %c1_i64_532 : i64
    %728 = arith.floordivsi %727, %c1_i64_518 : i64
    %729 = arith.addi %728, %c1_i64_532 : i64
    %730 = arith.index_cast %729 : i64 to index
    %731 = tensor.empty(%719, %730) : tensor<1x256x?x?xf32>
    %732 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_487 : tensor<256xf32>) outs(%731 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %733 = arith.floordivsi %c64_500, %700 : index
    %734 = arith.floordivsi %c256_506, %700 : index
    %c0_534 = arith.constant 0 : index
    %c1_535 = arith.constant 1 : index
    %735 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_529, %cast_476 : tensor<?x?x?x?xf32>, tensor<256x64x1x1xf32>) outs(%732 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_536 = tensor.cast %735 : tensor<1x256x?x?xf32> to tensor<1x256x56x56xf32>
    %736 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %737 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_537 = torch.constant.int 12
    %738 = torch.aten.item %736 : !torch.vtensor<[],f32> -> !torch.float
    %739 = torch_c.to_f64 %738
    %740 = torch.aten.item %737 : !torch.vtensor<[],si8> -> !torch.int
    %741 = torch_c.to_i64 %740
    %c1_538 = arith.constant 1 : index
    %c1_539 = arith.constant 1 : index
    %c256_540 = arith.constant 256 : index
    %c2_541 = arith.constant 2 : index
    %c56_542 = arith.constant 56 : index
    %c3_543 = arith.constant 3 : index
    %c56_544 = arith.constant 56 : index
    %742 = tensor.empty() : tensor<1x256x56x56xi8>
    %743 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_536 : tensor<1x256x56x56xf32>) outs(%742 : tensor<1x256x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %740
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %738
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x56x56xi8>
    %cast_545 = tensor.cast %743 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %cast_546 = tensor.cast %cast_545 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %744 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %745 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %746 = torch.aten.item %744 : !torch.vtensor<[],f32> -> !torch.float
    %747 = torch_c.to_f64 %746
    %748 = torch.aten.item %745 : !torch.vtensor<[],si8> -> !torch.int
    %749 = torch_c.to_i64 %748
    %cast_547 = tensor.cast %cast_546 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %c1_548 = arith.constant 1 : index
    %c1_549 = arith.constant 1 : index
    %c256_550 = arith.constant 256 : index
    %c2_551 = arith.constant 2 : index
    %c56_552 = arith.constant 56 : index
    %c3_553 = arith.constant 3 : index
    %c56_554 = arith.constant 56 : index
    %750 = tensor.empty() : tensor<1x256x56x56xf32>
    %751 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_547 : tensor<1x256x56x56xi8>) outs(%750 : tensor<1x256x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %748
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %746
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x56x56xf32>
    %cast_555 = tensor.cast %751 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
    %int1_556 = torch.constant.int 1
    %752 = torch_c.to_i64 %int1_556
    %c1_557 = arith.constant 1 : index
    %c1_558 = arith.constant 1 : index
    %c256_559 = arith.constant 256 : index
    %c2_560 = arith.constant 2 : index
    %c56_561 = arith.constant 56 : index
    %c3_562 = arith.constant 3 : index
    %c56_563 = arith.constant 56 : index
    %c1_564 = arith.constant 1 : index
    %c256_565 = arith.constant 256 : index
    %753 = arith.cmpi eq, %c256_559, %c256_565 : index
    cf.assert %753, "mismatched size for broadcast"
    %c2_566 = arith.constant 2 : index
    %c56_567 = arith.constant 56 : index
    %754 = arith.cmpi eq, %c56_561, %c56_567 : index
    cf.assert %754, "mismatched size for broadcast"
    %c3_568 = arith.constant 3 : index
    %c56_569 = arith.constant 56 : index
    %755 = arith.cmpi eq, %c56_563, %c56_569 : index
    cf.assert %755, "mismatched size for broadcast"
    %756 = tensor.empty() : tensor<1x256x56x56xf32>
    %757 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_461, %cast_555 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%756 : tensor<1x256x56x56xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %752 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x256x56x56xf32>
    %cast_570 = tensor.cast %757 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
    %c1_571 = arith.constant 1 : index
    %c1_572 = arith.constant 1 : index
    %c256_573 = arith.constant 256 : index
    %c2_574 = arith.constant 2 : index
    %c56_575 = arith.constant 56 : index
    %c3_576 = arith.constant 3 : index
    %c56_577 = arith.constant 56 : index
    %758 = tensor.empty() : tensor<1x256x56x56xf32>
    %759 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_570 : tensor<1x256x56x56xf32>) outs(%758 : tensor<1x256x56x56xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x56x56xf32>
    %cast_578 = tensor.cast %759 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
    %760 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %761 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_579 = torch.constant.int 12
    %762 = torch.aten.item %760 : !torch.vtensor<[],f32> -> !torch.float
    %763 = torch_c.to_f64 %762
    %764 = torch.aten.item %761 : !torch.vtensor<[],si8> -> !torch.int
    %765 = torch_c.to_i64 %764
    %c1_580 = arith.constant 1 : index
    %c1_581 = arith.constant 1 : index
    %c256_582 = arith.constant 256 : index
    %c2_583 = arith.constant 2 : index
    %c56_584 = arith.constant 56 : index
    %c3_585 = arith.constant 3 : index
    %c56_586 = arith.constant 56 : index
    %766 = tensor.empty() : tensor<1x256x56x56xi8>
    %767 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_578 : tensor<1x256x56x56xf32>) outs(%766 : tensor<1x256x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %764
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %762
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x56x56xi8>
    %cast_587 = tensor.cast %767 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %cast_588 = tensor.cast %cast_587 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %768 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %769 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %770 = torch.aten.item %768 : !torch.vtensor<[],f32> -> !torch.float
    %771 = torch_c.to_f64 %770
    %772 = torch.aten.item %769 : !torch.vtensor<[],si8> -> !torch.int
    %773 = torch_c.to_i64 %772
    %cast_589 = tensor.cast %cast_588 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %c1_590 = arith.constant 1 : index
    %c1_591 = arith.constant 1 : index
    %c256_592 = arith.constant 256 : index
    %c2_593 = arith.constant 2 : index
    %c56_594 = arith.constant 56 : index
    %c3_595 = arith.constant 3 : index
    %c56_596 = arith.constant 56 : index
    %774 = tensor.empty() : tensor<1x256x56x56xf32>
    %775 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_589 : tensor<1x256x56x56xi8>) outs(%774 : tensor<1x256x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %772
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %770
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x56x56xf32>
    %cast_597 = tensor.cast %775 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
    %776 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %777 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_598 = torch.constant.int 12
    %778 = torch.aten.item %776 : !torch.vtensor<[],f32> -> !torch.float
    %779 = torch_c.to_f64 %778
    %780 = torch.aten.item %777 : !torch.vtensor<[],si8> -> !torch.int
    %781 = torch_c.to_i64 %780
    %c1_599 = arith.constant 1 : index
    %c0_600 = arith.constant 0 : index
    %c64_601 = arith.constant 64 : index
    %c1_602 = arith.constant 1 : index
    %c256_603 = arith.constant 256 : index
    %782 = tensor.empty() : tensor<64x256x1x1xi8>
    %783 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%22 : tensor<64x256x1x1xf32>) outs(%782 : tensor<64x256x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %780
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %778
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64x256x1x1xi8>
    %cast_604 = tensor.cast %783 : tensor<64x256x1x1xi8> to tensor<64x256x1x1xi8>
    %cast_605 = tensor.cast %cast_604 : tensor<64x256x1x1xi8> to tensor<64x256x1x1xi8>
    %784 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %785 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %786 = torch.aten.item %784 : !torch.vtensor<[],f32> -> !torch.float
    %787 = torch_c.to_f64 %786
    %788 = torch.aten.item %785 : !torch.vtensor<[],si8> -> !torch.int
    %789 = torch_c.to_i64 %788
    %cast_606 = tensor.cast %cast_605 : tensor<64x256x1x1xi8> to tensor<64x256x1x1xi8>
    %c1_607 = arith.constant 1 : index
    %c0_608 = arith.constant 0 : index
    %c64_609 = arith.constant 64 : index
    %c1_610 = arith.constant 1 : index
    %c256_611 = arith.constant 256 : index
    %790 = tensor.empty() : tensor<64x256x1x1xf32>
    %791 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_606 : tensor<64x256x1x1xi8>) outs(%790 : tensor<64x256x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %788
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %786
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64x256x1x1xf32>
    %cast_612 = tensor.cast %791 : tensor<64x256x1x1xf32> to tensor<64x256x1x1xf32>
    %792 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %793 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_613 = torch.constant.int 12
    %794 = torch.aten.item %792 : !torch.vtensor<[],f32> -> !torch.float
    %795 = torch_c.to_f64 %794
    %796 = torch.aten.item %793 : !torch.vtensor<[],si8> -> !torch.int
    %797 = torch_c.to_i64 %796
    %c1_614 = arith.constant 1 : index
    %c0_615 = arith.constant 0 : index
    %c64_616 = arith.constant 64 : index
    %798 = tensor.empty() : tensor<64xi8>
    %799 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%24 : tensor<64xf32>) outs(%798 : tensor<64xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %796
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %794
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64xi8>
    %cast_617 = tensor.cast %799 : tensor<64xi8> to tensor<64xi8>
    %cast_618 = tensor.cast %cast_617 : tensor<64xi8> to tensor<64xi8>
    %800 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %801 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %802 = torch.aten.item %800 : !torch.vtensor<[],f32> -> !torch.float
    %803 = torch_c.to_f64 %802
    %804 = torch.aten.item %801 : !torch.vtensor<[],si8> -> !torch.int
    %805 = torch_c.to_i64 %804
    %cast_619 = tensor.cast %cast_618 : tensor<64xi8> to tensor<64xi8>
    %c1_620 = arith.constant 1 : index
    %c0_621 = arith.constant 0 : index
    %c64_622 = arith.constant 64 : index
    %806 = tensor.empty() : tensor<64xf32>
    %807 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_619 : tensor<64xi8>) outs(%806 : tensor<64xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %804
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %802
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64xf32>
    %cast_623 = tensor.cast %807 : tensor<64xf32> to tensor<64xf32>
    %int0_624 = torch.constant.int 0
    %int0_625 = torch.constant.int 0
    %int1_626 = torch.constant.int 1
    %int1_627 = torch.constant.int 1
    %int1_628 = torch.constant.int 1
    %int1_629 = torch.constant.int 1
    %int0_630 = torch.constant.int 0
    %808 = torch.prim.ListConstruct %int0_624, %int0_625 : (!torch.int, !torch.int) -> !torch.list<int>
    %809 = torch.prim.ListConstruct %int1_626, %int1_627 : (!torch.int, !torch.int) -> !torch.list<int>
    %810 = torch.prim.ListConstruct %int1_628, %int1_629 : (!torch.int, !torch.int) -> !torch.list<int>
    %811 = torch.prim.ListConstruct %int0_630, %int0_630 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_631 = torch.constant.bool false
    %int1_632 = torch.constant.int 1
    %812 = torch_c.to_i64 %int1_632
    %813 = torch_c.to_i64 %int0_624
    %814 = torch_c.to_i64 %int0_625
    %815 = torch_c.to_i64 %int0_630
    %816 = torch_c.to_i64 %int0_630
    %c0_633 = arith.constant 0 : index
    %c1_634 = arith.constant 1 : index
    %c1_635 = arith.constant 1 : index
    %c256_636 = arith.constant 256 : index
    %c2_637 = arith.constant 2 : index
    %c56_638 = arith.constant 56 : index
    %c3_639 = arith.constant 3 : index
    %c56_640 = arith.constant 56 : index
    %c0_641 = arith.constant 0 : index
    %c64_642 = arith.constant 64 : index
    %c1_643 = arith.constant 1 : index
    %c256_644 = arith.constant 256 : index
    %c2_645 = arith.constant 2 : index
    %c1_646 = arith.constant 1 : index
    %c3_647 = arith.constant 3 : index
    %c1_648 = arith.constant 1 : index
    %817 = arith.index_cast %812 : i64 to index
    %c0_649 = arith.constant 0 : index
    %818 = arith.remsi %c256_636, %817 : index
    %819 = arith.cmpi eq, %c0_649, %818 : index
    cf.assert %819, "invalid: groups must divide input channel size evenly."
    %c0_650 = arith.constant 0 : index
    %820 = arith.remsi %c64_642, %817 : index
    %821 = arith.cmpi eq, %c0_650, %820 : index
    cf.assert %821, "invalid: groups must divide weight batch size evenly."
    %c1_i64_651 = arith.constant 1 : i64
    %c1_i64_652 = arith.constant 1 : i64
    %c1_i64_653 = arith.constant 1 : i64
    %c1_i64_654 = arith.constant 1 : i64
    %cst_655 = arith.constant 0.000000e+00 : f32
    %c0_656 = arith.constant 0 : index
    %c1_657 = arith.constant 1 : index
    %c1_658 = arith.constant 1 : index
    %c256_659 = arith.constant 256 : index
    %c2_660 = arith.constant 2 : index
    %c56_661 = arith.constant 56 : index
    %c3_662 = arith.constant 3 : index
    %c56_663 = arith.constant 56 : index
    %c0_i64_664 = arith.constant 0 : i64
    %822 = arith.index_cast %c0_i64_664 : i64 to index
    %823 = arith.index_cast %c0_i64_664 : i64 to index
    %824 = arith.index_cast %813 : i64 to index
    %825 = arith.index_cast %814 : i64 to index
    %padded_665 = tensor.pad %cast_597 low[%822, %823, %824, %825] high[%822, %823, %824, %825] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_655 : f32
    } : tensor<1x256x56x56xf32> to tensor<?x?x?x?xf32>
    %826 = arith.index_cast %c1_646 : index to i64
    %c1_i64_666 = arith.constant 1 : i64
    %c2_i64_667 = arith.constant 2 : i64
    %827 = arith.muli %813, %c2_i64_667 : i64
    %828 = arith.index_cast %c56_638 : index to i64
    %829 = arith.addi %828, %827 : i64
    %830 = arith.subi %826, %c1_i64_666 : i64
    %831 = arith.muli %c1_i64_651, %830 : i64
    %832 = arith.subi %829, %831 : i64
    %833 = arith.subi %832, %c1_i64_666 : i64
    %834 = arith.floordivsi %833, %c1_i64_653 : i64
    %835 = arith.addi %834, %c1_i64_666 : i64
    %836 = arith.index_cast %835 : i64 to index
    %837 = arith.index_cast %c1_648 : index to i64
    %c1_i64_668 = arith.constant 1 : i64
    %c2_i64_669 = arith.constant 2 : i64
    %838 = arith.muli %814, %c2_i64_669 : i64
    %839 = arith.index_cast %c56_640 : index to i64
    %840 = arith.addi %839, %838 : i64
    %841 = arith.subi %837, %c1_i64_668 : i64
    %842 = arith.muli %c1_i64_652, %841 : i64
    %843 = arith.subi %840, %842 : i64
    %844 = arith.subi %843, %c1_i64_668 : i64
    %845 = arith.floordivsi %844, %c1_i64_654 : i64
    %846 = arith.addi %845, %c1_i64_668 : i64
    %847 = arith.index_cast %846 : i64 to index
    %848 = tensor.empty(%836, %847) : tensor<1x64x?x?xf32>
    %849 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_623 : tensor<64xf32>) outs(%848 : tensor<1x64x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x64x?x?xf32>
    %850 = arith.floordivsi %c256_636, %817 : index
    %851 = arith.floordivsi %c64_642, %817 : index
    %c0_670 = arith.constant 0 : index
    %c1_671 = arith.constant 1 : index
    %852 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_665, %cast_612 : tensor<?x?x?x?xf32>, tensor<64x256x1x1xf32>) outs(%849 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
    %cast_672 = tensor.cast %852 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
    %c1_673 = arith.constant 1 : index
    %c1_674 = arith.constant 1 : index
    %c64_675 = arith.constant 64 : index
    %c2_676 = arith.constant 2 : index
    %c56_677 = arith.constant 56 : index
    %c3_678 = arith.constant 3 : index
    %c56_679 = arith.constant 56 : index
    %853 = tensor.empty() : tensor<1x64x56x56xf32>
    %854 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_672 : tensor<1x64x56x56xf32>) outs(%853 : tensor<1x64x56x56xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x64x56x56xf32>
    %cast_680 = tensor.cast %854 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
    %855 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %856 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_681 = torch.constant.int 12
    %857 = torch.aten.item %855 : !torch.vtensor<[],f32> -> !torch.float
    %858 = torch_c.to_f64 %857
    %859 = torch.aten.item %856 : !torch.vtensor<[],si8> -> !torch.int
    %860 = torch_c.to_i64 %859
    %c1_682 = arith.constant 1 : index
    %c1_683 = arith.constant 1 : index
    %c64_684 = arith.constant 64 : index
    %c2_685 = arith.constant 2 : index
    %c56_686 = arith.constant 56 : index
    %c3_687 = arith.constant 3 : index
    %c56_688 = arith.constant 56 : index
    %861 = tensor.empty() : tensor<1x64x56x56xi8>
    %862 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_680 : tensor<1x64x56x56xf32>) outs(%861 : tensor<1x64x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %859
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %857
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x64x56x56xi8>
    %cast_689 = tensor.cast %862 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %cast_690 = tensor.cast %cast_689 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %863 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %864 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %865 = torch.aten.item %863 : !torch.vtensor<[],f32> -> !torch.float
    %866 = torch_c.to_f64 %865
    %867 = torch.aten.item %864 : !torch.vtensor<[],si8> -> !torch.int
    %868 = torch_c.to_i64 %867
    %cast_691 = tensor.cast %cast_690 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %c1_692 = arith.constant 1 : index
    %c1_693 = arith.constant 1 : index
    %c64_694 = arith.constant 64 : index
    %c2_695 = arith.constant 2 : index
    %c56_696 = arith.constant 56 : index
    %c3_697 = arith.constant 3 : index
    %c56_698 = arith.constant 56 : index
    %869 = tensor.empty() : tensor<1x64x56x56xf32>
    %870 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_691 : tensor<1x64x56x56xi8>) outs(%869 : tensor<1x64x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %867
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %865
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x64x56x56xf32>
    %cast_699 = tensor.cast %870 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
    %871 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %872 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_700 = torch.constant.int 12
    %873 = torch.aten.item %871 : !torch.vtensor<[],f32> -> !torch.float
    %874 = torch_c.to_f64 %873
    %875 = torch.aten.item %872 : !torch.vtensor<[],si8> -> !torch.int
    %876 = torch_c.to_i64 %875
    %c1_701 = arith.constant 1 : index
    %c0_702 = arith.constant 0 : index
    %c64_703 = arith.constant 64 : index
    %c1_704 = arith.constant 1 : index
    %c64_705 = arith.constant 64 : index
    %c2_706 = arith.constant 2 : index
    %c3_707 = arith.constant 3 : index
    %c3_708 = arith.constant 3 : index
    %c3_709 = arith.constant 3 : index
    %877 = tensor.empty() : tensor<64x64x3x3xi8>
    %878 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%26 : tensor<64x64x3x3xf32>) outs(%877 : tensor<64x64x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %875
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %873
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64x64x3x3xi8>
    %cast_710 = tensor.cast %878 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
    %cast_711 = tensor.cast %cast_710 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
    %879 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %880 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %881 = torch.aten.item %879 : !torch.vtensor<[],f32> -> !torch.float
    %882 = torch_c.to_f64 %881
    %883 = torch.aten.item %880 : !torch.vtensor<[],si8> -> !torch.int
    %884 = torch_c.to_i64 %883
    %cast_712 = tensor.cast %cast_711 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
    %c1_713 = arith.constant 1 : index
    %c0_714 = arith.constant 0 : index
    %c64_715 = arith.constant 64 : index
    %c1_716 = arith.constant 1 : index
    %c64_717 = arith.constant 64 : index
    %c2_718 = arith.constant 2 : index
    %c3_719 = arith.constant 3 : index
    %c3_720 = arith.constant 3 : index
    %c3_721 = arith.constant 3 : index
    %885 = tensor.empty() : tensor<64x64x3x3xf32>
    %886 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_712 : tensor<64x64x3x3xi8>) outs(%885 : tensor<64x64x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %883
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %881
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64x64x3x3xf32>
    %cast_722 = tensor.cast %886 : tensor<64x64x3x3xf32> to tensor<64x64x3x3xf32>
    %887 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %888 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_723 = torch.constant.int 12
    %889 = torch.aten.item %887 : !torch.vtensor<[],f32> -> !torch.float
    %890 = torch_c.to_f64 %889
    %891 = torch.aten.item %888 : !torch.vtensor<[],si8> -> !torch.int
    %892 = torch_c.to_i64 %891
    %c1_724 = arith.constant 1 : index
    %c0_725 = arith.constant 0 : index
    %c64_726 = arith.constant 64 : index
    %893 = tensor.empty() : tensor<64xi8>
    %894 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%28 : tensor<64xf32>) outs(%893 : tensor<64xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %891
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %889
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64xi8>
    %cast_727 = tensor.cast %894 : tensor<64xi8> to tensor<64xi8>
    %cast_728 = tensor.cast %cast_727 : tensor<64xi8> to tensor<64xi8>
    %895 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %896 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %897 = torch.aten.item %895 : !torch.vtensor<[],f32> -> !torch.float
    %898 = torch_c.to_f64 %897
    %899 = torch.aten.item %896 : !torch.vtensor<[],si8> -> !torch.int
    %900 = torch_c.to_i64 %899
    %cast_729 = tensor.cast %cast_728 : tensor<64xi8> to tensor<64xi8>
    %c1_730 = arith.constant 1 : index
    %c0_731 = arith.constant 0 : index
    %c64_732 = arith.constant 64 : index
    %901 = tensor.empty() : tensor<64xf32>
    %902 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_729 : tensor<64xi8>) outs(%901 : tensor<64xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %899
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %897
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64xf32>
    %cast_733 = tensor.cast %902 : tensor<64xf32> to tensor<64xf32>
    %int1_734 = torch.constant.int 1
    %int1_735 = torch.constant.int 1
    %int1_736 = torch.constant.int 1
    %int1_737 = torch.constant.int 1
    %int1_738 = torch.constant.int 1
    %int1_739 = torch.constant.int 1
    %int0_740 = torch.constant.int 0
    %903 = torch.prim.ListConstruct %int1_734, %int1_735 : (!torch.int, !torch.int) -> !torch.list<int>
    %904 = torch.prim.ListConstruct %int1_736, %int1_737 : (!torch.int, !torch.int) -> !torch.list<int>
    %905 = torch.prim.ListConstruct %int1_738, %int1_739 : (!torch.int, !torch.int) -> !torch.list<int>
    %906 = torch.prim.ListConstruct %int0_740, %int0_740 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_741 = torch.constant.bool false
    %int1_742 = torch.constant.int 1
    %907 = torch_c.to_i64 %int1_742
    %908 = torch_c.to_i64 %int1_734
    %909 = torch_c.to_i64 %int1_735
    %910 = torch_c.to_i64 %int0_740
    %911 = torch_c.to_i64 %int0_740
    %c0_743 = arith.constant 0 : index
    %c1_744 = arith.constant 1 : index
    %c1_745 = arith.constant 1 : index
    %c64_746 = arith.constant 64 : index
    %c2_747 = arith.constant 2 : index
    %c56_748 = arith.constant 56 : index
    %c3_749 = arith.constant 3 : index
    %c56_750 = arith.constant 56 : index
    %c0_751 = arith.constant 0 : index
    %c64_752 = arith.constant 64 : index
    %c1_753 = arith.constant 1 : index
    %c64_754 = arith.constant 64 : index
    %c2_755 = arith.constant 2 : index
    %c3_756 = arith.constant 3 : index
    %c3_757 = arith.constant 3 : index
    %c3_758 = arith.constant 3 : index
    %912 = arith.index_cast %907 : i64 to index
    %c0_759 = arith.constant 0 : index
    %913 = arith.remsi %c64_746, %912 : index
    %914 = arith.cmpi eq, %c0_759, %913 : index
    cf.assert %914, "invalid: groups must divide input channel size evenly."
    %c0_760 = arith.constant 0 : index
    %915 = arith.remsi %c64_752, %912 : index
    %916 = arith.cmpi eq, %c0_760, %915 : index
    cf.assert %916, "invalid: groups must divide weight batch size evenly."
    %c1_i64_761 = arith.constant 1 : i64
    %c1_i64_762 = arith.constant 1 : i64
    %c1_i64_763 = arith.constant 1 : i64
    %c1_i64_764 = arith.constant 1 : i64
    %cst_765 = arith.constant 0.000000e+00 : f32
    %c0_766 = arith.constant 0 : index
    %c1_767 = arith.constant 1 : index
    %c1_768 = arith.constant 1 : index
    %c64_769 = arith.constant 64 : index
    %c2_770 = arith.constant 2 : index
    %c56_771 = arith.constant 56 : index
    %c3_772 = arith.constant 3 : index
    %c56_773 = arith.constant 56 : index
    %c0_i64_774 = arith.constant 0 : i64
    %917 = arith.index_cast %c0_i64_774 : i64 to index
    %918 = arith.index_cast %c0_i64_774 : i64 to index
    %919 = arith.index_cast %908 : i64 to index
    %920 = arith.index_cast %909 : i64 to index
    %padded_775 = tensor.pad %cast_699 low[%917, %918, %919, %920] high[%917, %918, %919, %920] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_765 : f32
    } : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
    %921 = arith.index_cast %c3_756 : index to i64
    %c1_i64_776 = arith.constant 1 : i64
    %c2_i64_777 = arith.constant 2 : i64
    %922 = arith.muli %908, %c2_i64_777 : i64
    %923 = arith.index_cast %c56_748 : index to i64
    %924 = arith.addi %923, %922 : i64
    %925 = arith.subi %921, %c1_i64_776 : i64
    %926 = arith.muli %c1_i64_761, %925 : i64
    %927 = arith.subi %924, %926 : i64
    %928 = arith.subi %927, %c1_i64_776 : i64
    %929 = arith.floordivsi %928, %c1_i64_763 : i64
    %930 = arith.addi %929, %c1_i64_776 : i64
    %931 = arith.index_cast %930 : i64 to index
    %932 = arith.index_cast %c3_758 : index to i64
    %c1_i64_778 = arith.constant 1 : i64
    %c2_i64_779 = arith.constant 2 : i64
    %933 = arith.muli %909, %c2_i64_779 : i64
    %934 = arith.index_cast %c56_750 : index to i64
    %935 = arith.addi %934, %933 : i64
    %936 = arith.subi %932, %c1_i64_778 : i64
    %937 = arith.muli %c1_i64_762, %936 : i64
    %938 = arith.subi %935, %937 : i64
    %939 = arith.subi %938, %c1_i64_778 : i64
    %940 = arith.floordivsi %939, %c1_i64_764 : i64
    %941 = arith.addi %940, %c1_i64_778 : i64
    %942 = arith.index_cast %941 : i64 to index
    %943 = tensor.empty(%931, %942) : tensor<1x64x?x?xf32>
    %944 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_733 : tensor<64xf32>) outs(%943 : tensor<1x64x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x64x?x?xf32>
    %945 = arith.floordivsi %c64_746, %912 : index
    %946 = arith.floordivsi %c64_752, %912 : index
    %c0_780 = arith.constant 0 : index
    %c1_781 = arith.constant 1 : index
    %947 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_775, %cast_722 : tensor<?x?x?x?xf32>, tensor<64x64x3x3xf32>) outs(%944 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
    %cast_782 = tensor.cast %947 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
    %c1_783 = arith.constant 1 : index
    %c1_784 = arith.constant 1 : index
    %c64_785 = arith.constant 64 : index
    %c2_786 = arith.constant 2 : index
    %c56_787 = arith.constant 56 : index
    %c3_788 = arith.constant 3 : index
    %c56_789 = arith.constant 56 : index
    %948 = tensor.empty() : tensor<1x64x56x56xf32>
    %949 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_782 : tensor<1x64x56x56xf32>) outs(%948 : tensor<1x64x56x56xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x64x56x56xf32>
    %cast_790 = tensor.cast %949 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
    %950 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %951 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_791 = torch.constant.int 12
    %952 = torch.aten.item %950 : !torch.vtensor<[],f32> -> !torch.float
    %953 = torch_c.to_f64 %952
    %954 = torch.aten.item %951 : !torch.vtensor<[],si8> -> !torch.int
    %955 = torch_c.to_i64 %954
    %c1_792 = arith.constant 1 : index
    %c1_793 = arith.constant 1 : index
    %c64_794 = arith.constant 64 : index
    %c2_795 = arith.constant 2 : index
    %c56_796 = arith.constant 56 : index
    %c3_797 = arith.constant 3 : index
    %c56_798 = arith.constant 56 : index
    %956 = tensor.empty() : tensor<1x64x56x56xi8>
    %957 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_790 : tensor<1x64x56x56xf32>) outs(%956 : tensor<1x64x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %954
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %952
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x64x56x56xi8>
    %cast_799 = tensor.cast %957 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %cast_800 = tensor.cast %cast_799 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %958 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %959 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %960 = torch.aten.item %958 : !torch.vtensor<[],f32> -> !torch.float
    %961 = torch_c.to_f64 %960
    %962 = torch.aten.item %959 : !torch.vtensor<[],si8> -> !torch.int
    %963 = torch_c.to_i64 %962
    %cast_801 = tensor.cast %cast_800 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %c1_802 = arith.constant 1 : index
    %c1_803 = arith.constant 1 : index
    %c64_804 = arith.constant 64 : index
    %c2_805 = arith.constant 2 : index
    %c56_806 = arith.constant 56 : index
    %c3_807 = arith.constant 3 : index
    %c56_808 = arith.constant 56 : index
    %964 = tensor.empty() : tensor<1x64x56x56xf32>
    %965 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_801 : tensor<1x64x56x56xi8>) outs(%964 : tensor<1x64x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %962
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %960
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x64x56x56xf32>
    %cast_809 = tensor.cast %965 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
    %966 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %967 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_810 = torch.constant.int 12
    %968 = torch.aten.item %966 : !torch.vtensor<[],f32> -> !torch.float
    %969 = torch_c.to_f64 %968
    %970 = torch.aten.item %967 : !torch.vtensor<[],si8> -> !torch.int
    %971 = torch_c.to_i64 %970
    %c1_811 = arith.constant 1 : index
    %c0_812 = arith.constant 0 : index
    %c256_813 = arith.constant 256 : index
    %c1_814 = arith.constant 1 : index
    %c64_815 = arith.constant 64 : index
    %972 = tensor.empty() : tensor<256x64x1x1xi8>
    %973 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%30 : tensor<256x64x1x1xf32>) outs(%972 : tensor<256x64x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %970
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %968
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x64x1x1xi8>
    %cast_816 = tensor.cast %973 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
    %cast_817 = tensor.cast %cast_816 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
    %974 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %975 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %976 = torch.aten.item %974 : !torch.vtensor<[],f32> -> !torch.float
    %977 = torch_c.to_f64 %976
    %978 = torch.aten.item %975 : !torch.vtensor<[],si8> -> !torch.int
    %979 = torch_c.to_i64 %978
    %cast_818 = tensor.cast %cast_817 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
    %c1_819 = arith.constant 1 : index
    %c0_820 = arith.constant 0 : index
    %c256_821 = arith.constant 256 : index
    %c1_822 = arith.constant 1 : index
    %c64_823 = arith.constant 64 : index
    %980 = tensor.empty() : tensor<256x64x1x1xf32>
    %981 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_818 : tensor<256x64x1x1xi8>) outs(%980 : tensor<256x64x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %978
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %976
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x64x1x1xf32>
    %cast_824 = tensor.cast %981 : tensor<256x64x1x1xf32> to tensor<256x64x1x1xf32>
    %982 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %983 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_825 = torch.constant.int 12
    %984 = torch.aten.item %982 : !torch.vtensor<[],f32> -> !torch.float
    %985 = torch_c.to_f64 %984
    %986 = torch.aten.item %983 : !torch.vtensor<[],si8> -> !torch.int
    %987 = torch_c.to_i64 %986
    %c1_826 = arith.constant 1 : index
    %c0_827 = arith.constant 0 : index
    %c256_828 = arith.constant 256 : index
    %988 = tensor.empty() : tensor<256xi8>
    %989 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%32 : tensor<256xf32>) outs(%988 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %986
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %984
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_829 = tensor.cast %989 : tensor<256xi8> to tensor<256xi8>
    %cast_830 = tensor.cast %cast_829 : tensor<256xi8> to tensor<256xi8>
    %990 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %991 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %992 = torch.aten.item %990 : !torch.vtensor<[],f32> -> !torch.float
    %993 = torch_c.to_f64 %992
    %994 = torch.aten.item %991 : !torch.vtensor<[],si8> -> !torch.int
    %995 = torch_c.to_i64 %994
    %cast_831 = tensor.cast %cast_830 : tensor<256xi8> to tensor<256xi8>
    %c1_832 = arith.constant 1 : index
    %c0_833 = arith.constant 0 : index
    %c256_834 = arith.constant 256 : index
    %996 = tensor.empty() : tensor<256xf32>
    %997 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_831 : tensor<256xi8>) outs(%996 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %994
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %992
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_835 = tensor.cast %997 : tensor<256xf32> to tensor<256xf32>
    %int0_836 = torch.constant.int 0
    %int0_837 = torch.constant.int 0
    %int1_838 = torch.constant.int 1
    %int1_839 = torch.constant.int 1
    %int1_840 = torch.constant.int 1
    %int1_841 = torch.constant.int 1
    %int0_842 = torch.constant.int 0
    %998 = torch.prim.ListConstruct %int0_836, %int0_837 : (!torch.int, !torch.int) -> !torch.list<int>
    %999 = torch.prim.ListConstruct %int1_838, %int1_839 : (!torch.int, !torch.int) -> !torch.list<int>
    %1000 = torch.prim.ListConstruct %int1_840, %int1_841 : (!torch.int, !torch.int) -> !torch.list<int>
    %1001 = torch.prim.ListConstruct %int0_842, %int0_842 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_843 = torch.constant.bool false
    %int1_844 = torch.constant.int 1
    %1002 = torch_c.to_i64 %int1_844
    %1003 = torch_c.to_i64 %int0_836
    %1004 = torch_c.to_i64 %int0_837
    %1005 = torch_c.to_i64 %int0_842
    %1006 = torch_c.to_i64 %int0_842
    %c0_845 = arith.constant 0 : index
    %c1_846 = arith.constant 1 : index
    %c1_847 = arith.constant 1 : index
    %c64_848 = arith.constant 64 : index
    %c2_849 = arith.constant 2 : index
    %c56_850 = arith.constant 56 : index
    %c3_851 = arith.constant 3 : index
    %c56_852 = arith.constant 56 : index
    %c0_853 = arith.constant 0 : index
    %c256_854 = arith.constant 256 : index
    %c1_855 = arith.constant 1 : index
    %c64_856 = arith.constant 64 : index
    %c2_857 = arith.constant 2 : index
    %c1_858 = arith.constant 1 : index
    %c3_859 = arith.constant 3 : index
    %c1_860 = arith.constant 1 : index
    %1007 = arith.index_cast %1002 : i64 to index
    %c0_861 = arith.constant 0 : index
    %1008 = arith.remsi %c64_848, %1007 : index
    %1009 = arith.cmpi eq, %c0_861, %1008 : index
    cf.assert %1009, "invalid: groups must divide input channel size evenly."
    %c0_862 = arith.constant 0 : index
    %1010 = arith.remsi %c256_854, %1007 : index
    %1011 = arith.cmpi eq, %c0_862, %1010 : index
    cf.assert %1011, "invalid: groups must divide weight batch size evenly."
    %c1_i64_863 = arith.constant 1 : i64
    %c1_i64_864 = arith.constant 1 : i64
    %c1_i64_865 = arith.constant 1 : i64
    %c1_i64_866 = arith.constant 1 : i64
    %cst_867 = arith.constant 0.000000e+00 : f32
    %c0_868 = arith.constant 0 : index
    %c1_869 = arith.constant 1 : index
    %c1_870 = arith.constant 1 : index
    %c64_871 = arith.constant 64 : index
    %c2_872 = arith.constant 2 : index
    %c56_873 = arith.constant 56 : index
    %c3_874 = arith.constant 3 : index
    %c56_875 = arith.constant 56 : index
    %c0_i64_876 = arith.constant 0 : i64
    %1012 = arith.index_cast %c0_i64_876 : i64 to index
    %1013 = arith.index_cast %c0_i64_876 : i64 to index
    %1014 = arith.index_cast %1003 : i64 to index
    %1015 = arith.index_cast %1004 : i64 to index
    %padded_877 = tensor.pad %cast_809 low[%1012, %1013, %1014, %1015] high[%1012, %1013, %1014, %1015] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_867 : f32
    } : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
    %1016 = arith.index_cast %c1_858 : index to i64
    %c1_i64_878 = arith.constant 1 : i64
    %c2_i64_879 = arith.constant 2 : i64
    %1017 = arith.muli %1003, %c2_i64_879 : i64
    %1018 = arith.index_cast %c56_850 : index to i64
    %1019 = arith.addi %1018, %1017 : i64
    %1020 = arith.subi %1016, %c1_i64_878 : i64
    %1021 = arith.muli %c1_i64_863, %1020 : i64
    %1022 = arith.subi %1019, %1021 : i64
    %1023 = arith.subi %1022, %c1_i64_878 : i64
    %1024 = arith.floordivsi %1023, %c1_i64_865 : i64
    %1025 = arith.addi %1024, %c1_i64_878 : i64
    %1026 = arith.index_cast %1025 : i64 to index
    %1027 = arith.index_cast %c1_860 : index to i64
    %c1_i64_880 = arith.constant 1 : i64
    %c2_i64_881 = arith.constant 2 : i64
    %1028 = arith.muli %1004, %c2_i64_881 : i64
    %1029 = arith.index_cast %c56_852 : index to i64
    %1030 = arith.addi %1029, %1028 : i64
    %1031 = arith.subi %1027, %c1_i64_880 : i64
    %1032 = arith.muli %c1_i64_864, %1031 : i64
    %1033 = arith.subi %1030, %1032 : i64
    %1034 = arith.subi %1033, %c1_i64_880 : i64
    %1035 = arith.floordivsi %1034, %c1_i64_866 : i64
    %1036 = arith.addi %1035, %c1_i64_880 : i64
    %1037 = arith.index_cast %1036 : i64 to index
    %1038 = tensor.empty(%1026, %1037) : tensor<1x256x?x?xf32>
    %1039 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_835 : tensor<256xf32>) outs(%1038 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %1040 = arith.floordivsi %c64_848, %1007 : index
    %1041 = arith.floordivsi %c256_854, %1007 : index
    %c0_882 = arith.constant 0 : index
    %c1_883 = arith.constant 1 : index
    %1042 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_877, %cast_824 : tensor<?x?x?x?xf32>, tensor<256x64x1x1xf32>) outs(%1039 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_884 = tensor.cast %1042 : tensor<1x256x?x?xf32> to tensor<1x256x56x56xf32>
    %1043 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1044 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_885 = torch.constant.int 12
    %1045 = torch.aten.item %1043 : !torch.vtensor<[],f32> -> !torch.float
    %1046 = torch_c.to_f64 %1045
    %1047 = torch.aten.item %1044 : !torch.vtensor<[],si8> -> !torch.int
    %1048 = torch_c.to_i64 %1047
    %c1_886 = arith.constant 1 : index
    %c1_887 = arith.constant 1 : index
    %c256_888 = arith.constant 256 : index
    %c2_889 = arith.constant 2 : index
    %c56_890 = arith.constant 56 : index
    %c3_891 = arith.constant 3 : index
    %c56_892 = arith.constant 56 : index
    %1049 = tensor.empty() : tensor<1x256x56x56xi8>
    %1050 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_884 : tensor<1x256x56x56xf32>) outs(%1049 : tensor<1x256x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1047
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1045
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x56x56xi8>
    %cast_893 = tensor.cast %1050 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %cast_894 = tensor.cast %cast_893 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %1051 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1052 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1053 = torch.aten.item %1051 : !torch.vtensor<[],f32> -> !torch.float
    %1054 = torch_c.to_f64 %1053
    %1055 = torch.aten.item %1052 : !torch.vtensor<[],si8> -> !torch.int
    %1056 = torch_c.to_i64 %1055
    %cast_895 = tensor.cast %cast_894 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %c1_896 = arith.constant 1 : index
    %c1_897 = arith.constant 1 : index
    %c256_898 = arith.constant 256 : index
    %c2_899 = arith.constant 2 : index
    %c56_900 = arith.constant 56 : index
    %c3_901 = arith.constant 3 : index
    %c56_902 = arith.constant 56 : index
    %1057 = tensor.empty() : tensor<1x256x56x56xf32>
    %1058 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_895 : tensor<1x256x56x56xi8>) outs(%1057 : tensor<1x256x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1055
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1053
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x56x56xf32>
    %cast_903 = tensor.cast %1058 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
    %int1_904 = torch.constant.int 1
    %1059 = torch_c.to_i64 %int1_904
    %c1_905 = arith.constant 1 : index
    %c1_906 = arith.constant 1 : index
    %c256_907 = arith.constant 256 : index
    %c2_908 = arith.constant 2 : index
    %c56_909 = arith.constant 56 : index
    %c3_910 = arith.constant 3 : index
    %c56_911 = arith.constant 56 : index
    %c1_912 = arith.constant 1 : index
    %c256_913 = arith.constant 256 : index
    %1060 = arith.cmpi eq, %c256_907, %c256_913 : index
    cf.assert %1060, "mismatched size for broadcast"
    %c2_914 = arith.constant 2 : index
    %c56_915 = arith.constant 56 : index
    %1061 = arith.cmpi eq, %c56_909, %c56_915 : index
    cf.assert %1061, "mismatched size for broadcast"
    %c3_916 = arith.constant 3 : index
    %c56_917 = arith.constant 56 : index
    %1062 = arith.cmpi eq, %c56_911, %c56_917 : index
    cf.assert %1062, "mismatched size for broadcast"
    %1063 = tensor.empty() : tensor<1x256x56x56xf32>
    %1064 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_903, %cast_597 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%1063 : tensor<1x256x56x56xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %1059 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x256x56x56xf32>
    %cast_918 = tensor.cast %1064 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
    %c1_919 = arith.constant 1 : index
    %c1_920 = arith.constant 1 : index
    %c256_921 = arith.constant 256 : index
    %c2_922 = arith.constant 2 : index
    %c56_923 = arith.constant 56 : index
    %c3_924 = arith.constant 3 : index
    %c56_925 = arith.constant 56 : index
    %1065 = tensor.empty() : tensor<1x256x56x56xf32>
    %1066 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_918 : tensor<1x256x56x56xf32>) outs(%1065 : tensor<1x256x56x56xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x56x56xf32>
    %cast_926 = tensor.cast %1066 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
    %1067 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1068 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_927 = torch.constant.int 12
    %1069 = torch.aten.item %1067 : !torch.vtensor<[],f32> -> !torch.float
    %1070 = torch_c.to_f64 %1069
    %1071 = torch.aten.item %1068 : !torch.vtensor<[],si8> -> !torch.int
    %1072 = torch_c.to_i64 %1071
    %c1_928 = arith.constant 1 : index
    %c1_929 = arith.constant 1 : index
    %c256_930 = arith.constant 256 : index
    %c2_931 = arith.constant 2 : index
    %c56_932 = arith.constant 56 : index
    %c3_933 = arith.constant 3 : index
    %c56_934 = arith.constant 56 : index
    %1073 = tensor.empty() : tensor<1x256x56x56xi8>
    %1074 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_926 : tensor<1x256x56x56xf32>) outs(%1073 : tensor<1x256x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1071
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1069
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x56x56xi8>
    %cast_935 = tensor.cast %1074 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %cast_936 = tensor.cast %cast_935 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %1075 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1076 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1077 = torch.aten.item %1075 : !torch.vtensor<[],f32> -> !torch.float
    %1078 = torch_c.to_f64 %1077
    %1079 = torch.aten.item %1076 : !torch.vtensor<[],si8> -> !torch.int
    %1080 = torch_c.to_i64 %1079
    %cast_937 = tensor.cast %cast_936 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %c1_938 = arith.constant 1 : index
    %c1_939 = arith.constant 1 : index
    %c256_940 = arith.constant 256 : index
    %c2_941 = arith.constant 2 : index
    %c56_942 = arith.constant 56 : index
    %c3_943 = arith.constant 3 : index
    %c56_944 = arith.constant 56 : index
    %1081 = tensor.empty() : tensor<1x256x56x56xf32>
    %1082 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_937 : tensor<1x256x56x56xi8>) outs(%1081 : tensor<1x256x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1079
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1077
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x56x56xf32>
    %cast_945 = tensor.cast %1082 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
    %1083 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1084 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_946 = torch.constant.int 12
    %1085 = torch.aten.item %1083 : !torch.vtensor<[],f32> -> !torch.float
    %1086 = torch_c.to_f64 %1085
    %1087 = torch.aten.item %1084 : !torch.vtensor<[],si8> -> !torch.int
    %1088 = torch_c.to_i64 %1087
    %c1_947 = arith.constant 1 : index
    %c0_948 = arith.constant 0 : index
    %c64_949 = arith.constant 64 : index
    %c1_950 = arith.constant 1 : index
    %c256_951 = arith.constant 256 : index
    %1089 = tensor.empty() : tensor<64x256x1x1xi8>
    %1090 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%34 : tensor<64x256x1x1xf32>) outs(%1089 : tensor<64x256x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1087
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1085
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64x256x1x1xi8>
    %cast_952 = tensor.cast %1090 : tensor<64x256x1x1xi8> to tensor<64x256x1x1xi8>
    %cast_953 = tensor.cast %cast_952 : tensor<64x256x1x1xi8> to tensor<64x256x1x1xi8>
    %1091 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1092 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1093 = torch.aten.item %1091 : !torch.vtensor<[],f32> -> !torch.float
    %1094 = torch_c.to_f64 %1093
    %1095 = torch.aten.item %1092 : !torch.vtensor<[],si8> -> !torch.int
    %1096 = torch_c.to_i64 %1095
    %cast_954 = tensor.cast %cast_953 : tensor<64x256x1x1xi8> to tensor<64x256x1x1xi8>
    %c1_955 = arith.constant 1 : index
    %c0_956 = arith.constant 0 : index
    %c64_957 = arith.constant 64 : index
    %c1_958 = arith.constant 1 : index
    %c256_959 = arith.constant 256 : index
    %1097 = tensor.empty() : tensor<64x256x1x1xf32>
    %1098 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_954 : tensor<64x256x1x1xi8>) outs(%1097 : tensor<64x256x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1095
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1093
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64x256x1x1xf32>
    %cast_960 = tensor.cast %1098 : tensor<64x256x1x1xf32> to tensor<64x256x1x1xf32>
    %1099 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1100 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_961 = torch.constant.int 12
    %1101 = torch.aten.item %1099 : !torch.vtensor<[],f32> -> !torch.float
    %1102 = torch_c.to_f64 %1101
    %1103 = torch.aten.item %1100 : !torch.vtensor<[],si8> -> !torch.int
    %1104 = torch_c.to_i64 %1103
    %c1_962 = arith.constant 1 : index
    %c0_963 = arith.constant 0 : index
    %c64_964 = arith.constant 64 : index
    %1105 = tensor.empty() : tensor<64xi8>
    %1106 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%36 : tensor<64xf32>) outs(%1105 : tensor<64xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1103
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1101
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64xi8>
    %cast_965 = tensor.cast %1106 : tensor<64xi8> to tensor<64xi8>
    %cast_966 = tensor.cast %cast_965 : tensor<64xi8> to tensor<64xi8>
    %1107 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1108 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1109 = torch.aten.item %1107 : !torch.vtensor<[],f32> -> !torch.float
    %1110 = torch_c.to_f64 %1109
    %1111 = torch.aten.item %1108 : !torch.vtensor<[],si8> -> !torch.int
    %1112 = torch_c.to_i64 %1111
    %cast_967 = tensor.cast %cast_966 : tensor<64xi8> to tensor<64xi8>
    %c1_968 = arith.constant 1 : index
    %c0_969 = arith.constant 0 : index
    %c64_970 = arith.constant 64 : index
    %1113 = tensor.empty() : tensor<64xf32>
    %1114 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_967 : tensor<64xi8>) outs(%1113 : tensor<64xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1111
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1109
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64xf32>
    %cast_971 = tensor.cast %1114 : tensor<64xf32> to tensor<64xf32>
    %int0_972 = torch.constant.int 0
    %int0_973 = torch.constant.int 0
    %int1_974 = torch.constant.int 1
    %int1_975 = torch.constant.int 1
    %int1_976 = torch.constant.int 1
    %int1_977 = torch.constant.int 1
    %int0_978 = torch.constant.int 0
    %1115 = torch.prim.ListConstruct %int0_972, %int0_973 : (!torch.int, !torch.int) -> !torch.list<int>
    %1116 = torch.prim.ListConstruct %int1_974, %int1_975 : (!torch.int, !torch.int) -> !torch.list<int>
    %1117 = torch.prim.ListConstruct %int1_976, %int1_977 : (!torch.int, !torch.int) -> !torch.list<int>
    %1118 = torch.prim.ListConstruct %int0_978, %int0_978 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_979 = torch.constant.bool false
    %int1_980 = torch.constant.int 1
    %1119 = torch_c.to_i64 %int1_980
    %1120 = torch_c.to_i64 %int0_972
    %1121 = torch_c.to_i64 %int0_973
    %1122 = torch_c.to_i64 %int0_978
    %1123 = torch_c.to_i64 %int0_978
    %c0_981 = arith.constant 0 : index
    %c1_982 = arith.constant 1 : index
    %c1_983 = arith.constant 1 : index
    %c256_984 = arith.constant 256 : index
    %c2_985 = arith.constant 2 : index
    %c56_986 = arith.constant 56 : index
    %c3_987 = arith.constant 3 : index
    %c56_988 = arith.constant 56 : index
    %c0_989 = arith.constant 0 : index
    %c64_990 = arith.constant 64 : index
    %c1_991 = arith.constant 1 : index
    %c256_992 = arith.constant 256 : index
    %c2_993 = arith.constant 2 : index
    %c1_994 = arith.constant 1 : index
    %c3_995 = arith.constant 3 : index
    %c1_996 = arith.constant 1 : index
    %1124 = arith.index_cast %1119 : i64 to index
    %c0_997 = arith.constant 0 : index
    %1125 = arith.remsi %c256_984, %1124 : index
    %1126 = arith.cmpi eq, %c0_997, %1125 : index
    cf.assert %1126, "invalid: groups must divide input channel size evenly."
    %c0_998 = arith.constant 0 : index
    %1127 = arith.remsi %c64_990, %1124 : index
    %1128 = arith.cmpi eq, %c0_998, %1127 : index
    cf.assert %1128, "invalid: groups must divide weight batch size evenly."
    %c1_i64_999 = arith.constant 1 : i64
    %c1_i64_1000 = arith.constant 1 : i64
    %c1_i64_1001 = arith.constant 1 : i64
    %c1_i64_1002 = arith.constant 1 : i64
    %cst_1003 = arith.constant 0.000000e+00 : f32
    %c0_1004 = arith.constant 0 : index
    %c1_1005 = arith.constant 1 : index
    %c1_1006 = arith.constant 1 : index
    %c256_1007 = arith.constant 256 : index
    %c2_1008 = arith.constant 2 : index
    %c56_1009 = arith.constant 56 : index
    %c3_1010 = arith.constant 3 : index
    %c56_1011 = arith.constant 56 : index
    %c0_i64_1012 = arith.constant 0 : i64
    %1129 = arith.index_cast %c0_i64_1012 : i64 to index
    %1130 = arith.index_cast %c0_i64_1012 : i64 to index
    %1131 = arith.index_cast %1120 : i64 to index
    %1132 = arith.index_cast %1121 : i64 to index
    %padded_1013 = tensor.pad %cast_945 low[%1129, %1130, %1131, %1132] high[%1129, %1130, %1131, %1132] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_1003 : f32
    } : tensor<1x256x56x56xf32> to tensor<?x?x?x?xf32>
    %1133 = arith.index_cast %c1_994 : index to i64
    %c1_i64_1014 = arith.constant 1 : i64
    %c2_i64_1015 = arith.constant 2 : i64
    %1134 = arith.muli %1120, %c2_i64_1015 : i64
    %1135 = arith.index_cast %c56_986 : index to i64
    %1136 = arith.addi %1135, %1134 : i64
    %1137 = arith.subi %1133, %c1_i64_1014 : i64
    %1138 = arith.muli %c1_i64_999, %1137 : i64
    %1139 = arith.subi %1136, %1138 : i64
    %1140 = arith.subi %1139, %c1_i64_1014 : i64
    %1141 = arith.floordivsi %1140, %c1_i64_1001 : i64
    %1142 = arith.addi %1141, %c1_i64_1014 : i64
    %1143 = arith.index_cast %1142 : i64 to index
    %1144 = arith.index_cast %c1_996 : index to i64
    %c1_i64_1016 = arith.constant 1 : i64
    %c2_i64_1017 = arith.constant 2 : i64
    %1145 = arith.muli %1121, %c2_i64_1017 : i64
    %1146 = arith.index_cast %c56_988 : index to i64
    %1147 = arith.addi %1146, %1145 : i64
    %1148 = arith.subi %1144, %c1_i64_1016 : i64
    %1149 = arith.muli %c1_i64_1000, %1148 : i64
    %1150 = arith.subi %1147, %1149 : i64
    %1151 = arith.subi %1150, %c1_i64_1016 : i64
    %1152 = arith.floordivsi %1151, %c1_i64_1002 : i64
    %1153 = arith.addi %1152, %c1_i64_1016 : i64
    %1154 = arith.index_cast %1153 : i64 to index
    %1155 = tensor.empty(%1143, %1154) : tensor<1x64x?x?xf32>
    %1156 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_971 : tensor<64xf32>) outs(%1155 : tensor<1x64x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x64x?x?xf32>
    %1157 = arith.floordivsi %c256_984, %1124 : index
    %1158 = arith.floordivsi %c64_990, %1124 : index
    %c0_1018 = arith.constant 0 : index
    %c1_1019 = arith.constant 1 : index
    %1159 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1013, %cast_960 : tensor<?x?x?x?xf32>, tensor<64x256x1x1xf32>) outs(%1156 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
    %cast_1020 = tensor.cast %1159 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
    %c1_1021 = arith.constant 1 : index
    %c1_1022 = arith.constant 1 : index
    %c64_1023 = arith.constant 64 : index
    %c2_1024 = arith.constant 2 : index
    %c56_1025 = arith.constant 56 : index
    %c3_1026 = arith.constant 3 : index
    %c56_1027 = arith.constant 56 : index
    %1160 = tensor.empty() : tensor<1x64x56x56xf32>
    %1161 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1020 : tensor<1x64x56x56xf32>) outs(%1160 : tensor<1x64x56x56xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x64x56x56xf32>
    %cast_1028 = tensor.cast %1161 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
    %1162 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1163 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1029 = torch.constant.int 12
    %1164 = torch.aten.item %1162 : !torch.vtensor<[],f32> -> !torch.float
    %1165 = torch_c.to_f64 %1164
    %1166 = torch.aten.item %1163 : !torch.vtensor<[],si8> -> !torch.int
    %1167 = torch_c.to_i64 %1166
    %c1_1030 = arith.constant 1 : index
    %c1_1031 = arith.constant 1 : index
    %c64_1032 = arith.constant 64 : index
    %c2_1033 = arith.constant 2 : index
    %c56_1034 = arith.constant 56 : index
    %c3_1035 = arith.constant 3 : index
    %c56_1036 = arith.constant 56 : index
    %1168 = tensor.empty() : tensor<1x64x56x56xi8>
    %1169 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1028 : tensor<1x64x56x56xf32>) outs(%1168 : tensor<1x64x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1166
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1164
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x64x56x56xi8>
    %cast_1037 = tensor.cast %1169 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %cast_1038 = tensor.cast %cast_1037 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %1170 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1171 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1172 = torch.aten.item %1170 : !torch.vtensor<[],f32> -> !torch.float
    %1173 = torch_c.to_f64 %1172
    %1174 = torch.aten.item %1171 : !torch.vtensor<[],si8> -> !torch.int
    %1175 = torch_c.to_i64 %1174
    %cast_1039 = tensor.cast %cast_1038 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %c1_1040 = arith.constant 1 : index
    %c1_1041 = arith.constant 1 : index
    %c64_1042 = arith.constant 64 : index
    %c2_1043 = arith.constant 2 : index
    %c56_1044 = arith.constant 56 : index
    %c3_1045 = arith.constant 3 : index
    %c56_1046 = arith.constant 56 : index
    %1176 = tensor.empty() : tensor<1x64x56x56xf32>
    %1177 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1039 : tensor<1x64x56x56xi8>) outs(%1176 : tensor<1x64x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1174
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1172
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x64x56x56xf32>
    %cast_1047 = tensor.cast %1177 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
    %1178 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1179 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1048 = torch.constant.int 12
    %1180 = torch.aten.item %1178 : !torch.vtensor<[],f32> -> !torch.float
    %1181 = torch_c.to_f64 %1180
    %1182 = torch.aten.item %1179 : !torch.vtensor<[],si8> -> !torch.int
    %1183 = torch_c.to_i64 %1182
    %c1_1049 = arith.constant 1 : index
    %c0_1050 = arith.constant 0 : index
    %c64_1051 = arith.constant 64 : index
    %c1_1052 = arith.constant 1 : index
    %c64_1053 = arith.constant 64 : index
    %c2_1054 = arith.constant 2 : index
    %c3_1055 = arith.constant 3 : index
    %c3_1056 = arith.constant 3 : index
    %c3_1057 = arith.constant 3 : index
    %1184 = tensor.empty() : tensor<64x64x3x3xi8>
    %1185 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%38 : tensor<64x64x3x3xf32>) outs(%1184 : tensor<64x64x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1182
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1180
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64x64x3x3xi8>
    %cast_1058 = tensor.cast %1185 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
    %cast_1059 = tensor.cast %cast_1058 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
    %1186 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1187 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1188 = torch.aten.item %1186 : !torch.vtensor<[],f32> -> !torch.float
    %1189 = torch_c.to_f64 %1188
    %1190 = torch.aten.item %1187 : !torch.vtensor<[],si8> -> !torch.int
    %1191 = torch_c.to_i64 %1190
    %cast_1060 = tensor.cast %cast_1059 : tensor<64x64x3x3xi8> to tensor<64x64x3x3xi8>
    %c1_1061 = arith.constant 1 : index
    %c0_1062 = arith.constant 0 : index
    %c64_1063 = arith.constant 64 : index
    %c1_1064 = arith.constant 1 : index
    %c64_1065 = arith.constant 64 : index
    %c2_1066 = arith.constant 2 : index
    %c3_1067 = arith.constant 3 : index
    %c3_1068 = arith.constant 3 : index
    %c3_1069 = arith.constant 3 : index
    %1192 = tensor.empty() : tensor<64x64x3x3xf32>
    %1193 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1060 : tensor<64x64x3x3xi8>) outs(%1192 : tensor<64x64x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1190
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1188
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64x64x3x3xf32>
    %cast_1070 = tensor.cast %1193 : tensor<64x64x3x3xf32> to tensor<64x64x3x3xf32>
    %1194 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1195 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1071 = torch.constant.int 12
    %1196 = torch.aten.item %1194 : !torch.vtensor<[],f32> -> !torch.float
    %1197 = torch_c.to_f64 %1196
    %1198 = torch.aten.item %1195 : !torch.vtensor<[],si8> -> !torch.int
    %1199 = torch_c.to_i64 %1198
    %c1_1072 = arith.constant 1 : index
    %c0_1073 = arith.constant 0 : index
    %c64_1074 = arith.constant 64 : index
    %1200 = tensor.empty() : tensor<64xi8>
    %1201 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%40 : tensor<64xf32>) outs(%1200 : tensor<64xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1198
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1196
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<64xi8>
    %cast_1075 = tensor.cast %1201 : tensor<64xi8> to tensor<64xi8>
    %cast_1076 = tensor.cast %cast_1075 : tensor<64xi8> to tensor<64xi8>
    %1202 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1203 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1204 = torch.aten.item %1202 : !torch.vtensor<[],f32> -> !torch.float
    %1205 = torch_c.to_f64 %1204
    %1206 = torch.aten.item %1203 : !torch.vtensor<[],si8> -> !torch.int
    %1207 = torch_c.to_i64 %1206
    %cast_1077 = tensor.cast %cast_1076 : tensor<64xi8> to tensor<64xi8>
    %c1_1078 = arith.constant 1 : index
    %c0_1079 = arith.constant 0 : index
    %c64_1080 = arith.constant 64 : index
    %1208 = tensor.empty() : tensor<64xf32>
    %1209 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1077 : tensor<64xi8>) outs(%1208 : tensor<64xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1206
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1204
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<64xf32>
    %cast_1081 = tensor.cast %1209 : tensor<64xf32> to tensor<64xf32>
    %int1_1082 = torch.constant.int 1
    %int1_1083 = torch.constant.int 1
    %int1_1084 = torch.constant.int 1
    %int1_1085 = torch.constant.int 1
    %int1_1086 = torch.constant.int 1
    %int1_1087 = torch.constant.int 1
    %int0_1088 = torch.constant.int 0
    %1210 = torch.prim.ListConstruct %int1_1082, %int1_1083 : (!torch.int, !torch.int) -> !torch.list<int>
    %1211 = torch.prim.ListConstruct %int1_1084, %int1_1085 : (!torch.int, !torch.int) -> !torch.list<int>
    %1212 = torch.prim.ListConstruct %int1_1086, %int1_1087 : (!torch.int, !torch.int) -> !torch.list<int>
    %1213 = torch.prim.ListConstruct %int0_1088, %int0_1088 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_1089 = torch.constant.bool false
    %int1_1090 = torch.constant.int 1
    %1214 = torch_c.to_i64 %int1_1090
    %1215 = torch_c.to_i64 %int1_1082
    %1216 = torch_c.to_i64 %int1_1083
    %1217 = torch_c.to_i64 %int0_1088
    %1218 = torch_c.to_i64 %int0_1088
    %c0_1091 = arith.constant 0 : index
    %c1_1092 = arith.constant 1 : index
    %c1_1093 = arith.constant 1 : index
    %c64_1094 = arith.constant 64 : index
    %c2_1095 = arith.constant 2 : index
    %c56_1096 = arith.constant 56 : index
    %c3_1097 = arith.constant 3 : index
    %c56_1098 = arith.constant 56 : index
    %c0_1099 = arith.constant 0 : index
    %c64_1100 = arith.constant 64 : index
    %c1_1101 = arith.constant 1 : index
    %c64_1102 = arith.constant 64 : index
    %c2_1103 = arith.constant 2 : index
    %c3_1104 = arith.constant 3 : index
    %c3_1105 = arith.constant 3 : index
    %c3_1106 = arith.constant 3 : index
    %1219 = arith.index_cast %1214 : i64 to index
    %c0_1107 = arith.constant 0 : index
    %1220 = arith.remsi %c64_1094, %1219 : index
    %1221 = arith.cmpi eq, %c0_1107, %1220 : index
    cf.assert %1221, "invalid: groups must divide input channel size evenly."
    %c0_1108 = arith.constant 0 : index
    %1222 = arith.remsi %c64_1100, %1219 : index
    %1223 = arith.cmpi eq, %c0_1108, %1222 : index
    cf.assert %1223, "invalid: groups must divide weight batch size evenly."
    %c1_i64_1109 = arith.constant 1 : i64
    %c1_i64_1110 = arith.constant 1 : i64
    %c1_i64_1111 = arith.constant 1 : i64
    %c1_i64_1112 = arith.constant 1 : i64
    %cst_1113 = arith.constant 0.000000e+00 : f32
    %c0_1114 = arith.constant 0 : index
    %c1_1115 = arith.constant 1 : index
    %c1_1116 = arith.constant 1 : index
    %c64_1117 = arith.constant 64 : index
    %c2_1118 = arith.constant 2 : index
    %c56_1119 = arith.constant 56 : index
    %c3_1120 = arith.constant 3 : index
    %c56_1121 = arith.constant 56 : index
    %c0_i64_1122 = arith.constant 0 : i64
    %1224 = arith.index_cast %c0_i64_1122 : i64 to index
    %1225 = arith.index_cast %c0_i64_1122 : i64 to index
    %1226 = arith.index_cast %1215 : i64 to index
    %1227 = arith.index_cast %1216 : i64 to index
    %padded_1123 = tensor.pad %cast_1047 low[%1224, %1225, %1226, %1227] high[%1224, %1225, %1226, %1227] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_1113 : f32
    } : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
    %1228 = arith.index_cast %c3_1104 : index to i64
    %c1_i64_1124 = arith.constant 1 : i64
    %c2_i64_1125 = arith.constant 2 : i64
    %1229 = arith.muli %1215, %c2_i64_1125 : i64
    %1230 = arith.index_cast %c56_1096 : index to i64
    %1231 = arith.addi %1230, %1229 : i64
    %1232 = arith.subi %1228, %c1_i64_1124 : i64
    %1233 = arith.muli %c1_i64_1109, %1232 : i64
    %1234 = arith.subi %1231, %1233 : i64
    %1235 = arith.subi %1234, %c1_i64_1124 : i64
    %1236 = arith.floordivsi %1235, %c1_i64_1111 : i64
    %1237 = arith.addi %1236, %c1_i64_1124 : i64
    %1238 = arith.index_cast %1237 : i64 to index
    %1239 = arith.index_cast %c3_1106 : index to i64
    %c1_i64_1126 = arith.constant 1 : i64
    %c2_i64_1127 = arith.constant 2 : i64
    %1240 = arith.muli %1216, %c2_i64_1127 : i64
    %1241 = arith.index_cast %c56_1098 : index to i64
    %1242 = arith.addi %1241, %1240 : i64
    %1243 = arith.subi %1239, %c1_i64_1126 : i64
    %1244 = arith.muli %c1_i64_1110, %1243 : i64
    %1245 = arith.subi %1242, %1244 : i64
    %1246 = arith.subi %1245, %c1_i64_1126 : i64
    %1247 = arith.floordivsi %1246, %c1_i64_1112 : i64
    %1248 = arith.addi %1247, %c1_i64_1126 : i64
    %1249 = arith.index_cast %1248 : i64 to index
    %1250 = tensor.empty(%1238, %1249) : tensor<1x64x?x?xf32>
    %1251 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1081 : tensor<64xf32>) outs(%1250 : tensor<1x64x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x64x?x?xf32>
    %1252 = arith.floordivsi %c64_1094, %1219 : index
    %1253 = arith.floordivsi %c64_1100, %1219 : index
    %c0_1128 = arith.constant 0 : index
    %c1_1129 = arith.constant 1 : index
    %1254 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1123, %cast_1070 : tensor<?x?x?x?xf32>, tensor<64x64x3x3xf32>) outs(%1251 : tensor<1x64x?x?xf32>) -> tensor<1x64x?x?xf32>
    %cast_1130 = tensor.cast %1254 : tensor<1x64x?x?xf32> to tensor<1x64x56x56xf32>
    %c1_1131 = arith.constant 1 : index
    %c1_1132 = arith.constant 1 : index
    %c64_1133 = arith.constant 64 : index
    %c2_1134 = arith.constant 2 : index
    %c56_1135 = arith.constant 56 : index
    %c3_1136 = arith.constant 3 : index
    %c56_1137 = arith.constant 56 : index
    %1255 = tensor.empty() : tensor<1x64x56x56xf32>
    %1256 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1130 : tensor<1x64x56x56xf32>) outs(%1255 : tensor<1x64x56x56xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x64x56x56xf32>
    %cast_1138 = tensor.cast %1256 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
    %1257 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1258 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1139 = torch.constant.int 12
    %1259 = torch.aten.item %1257 : !torch.vtensor<[],f32> -> !torch.float
    %1260 = torch_c.to_f64 %1259
    %1261 = torch.aten.item %1258 : !torch.vtensor<[],si8> -> !torch.int
    %1262 = torch_c.to_i64 %1261
    %c1_1140 = arith.constant 1 : index
    %c1_1141 = arith.constant 1 : index
    %c64_1142 = arith.constant 64 : index
    %c2_1143 = arith.constant 2 : index
    %c56_1144 = arith.constant 56 : index
    %c3_1145 = arith.constant 3 : index
    %c56_1146 = arith.constant 56 : index
    %1263 = tensor.empty() : tensor<1x64x56x56xi8>
    %1264 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1138 : tensor<1x64x56x56xf32>) outs(%1263 : tensor<1x64x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1261
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1259
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x64x56x56xi8>
    %cast_1147 = tensor.cast %1264 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %cast_1148 = tensor.cast %cast_1147 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %1265 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1266 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1267 = torch.aten.item %1265 : !torch.vtensor<[],f32> -> !torch.float
    %1268 = torch_c.to_f64 %1267
    %1269 = torch.aten.item %1266 : !torch.vtensor<[],si8> -> !torch.int
    %1270 = torch_c.to_i64 %1269
    %cast_1149 = tensor.cast %cast_1148 : tensor<1x64x56x56xi8> to tensor<1x64x56x56xi8>
    %c1_1150 = arith.constant 1 : index
    %c1_1151 = arith.constant 1 : index
    %c64_1152 = arith.constant 64 : index
    %c2_1153 = arith.constant 2 : index
    %c56_1154 = arith.constant 56 : index
    %c3_1155 = arith.constant 3 : index
    %c56_1156 = arith.constant 56 : index
    %1271 = tensor.empty() : tensor<1x64x56x56xf32>
    %1272 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1149 : tensor<1x64x56x56xi8>) outs(%1271 : tensor<1x64x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1269
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1267
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x64x56x56xf32>
    %cast_1157 = tensor.cast %1272 : tensor<1x64x56x56xf32> to tensor<1x64x56x56xf32>
    %1273 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1274 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1158 = torch.constant.int 12
    %1275 = torch.aten.item %1273 : !torch.vtensor<[],f32> -> !torch.float
    %1276 = torch_c.to_f64 %1275
    %1277 = torch.aten.item %1274 : !torch.vtensor<[],si8> -> !torch.int
    %1278 = torch_c.to_i64 %1277
    %c1_1159 = arith.constant 1 : index
    %c0_1160 = arith.constant 0 : index
    %c256_1161 = arith.constant 256 : index
    %c1_1162 = arith.constant 1 : index
    %c64_1163 = arith.constant 64 : index
    %1279 = tensor.empty() : tensor<256x64x1x1xi8>
    %1280 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%42 : tensor<256x64x1x1xf32>) outs(%1279 : tensor<256x64x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1277
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1275
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x64x1x1xi8>
    %cast_1164 = tensor.cast %1280 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
    %cast_1165 = tensor.cast %cast_1164 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
    %1281 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1282 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1283 = torch.aten.item %1281 : !torch.vtensor<[],f32> -> !torch.float
    %1284 = torch_c.to_f64 %1283
    %1285 = torch.aten.item %1282 : !torch.vtensor<[],si8> -> !torch.int
    %1286 = torch_c.to_i64 %1285
    %cast_1166 = tensor.cast %cast_1165 : tensor<256x64x1x1xi8> to tensor<256x64x1x1xi8>
    %c1_1167 = arith.constant 1 : index
    %c0_1168 = arith.constant 0 : index
    %c256_1169 = arith.constant 256 : index
    %c1_1170 = arith.constant 1 : index
    %c64_1171 = arith.constant 64 : index
    %1287 = tensor.empty() : tensor<256x64x1x1xf32>
    %1288 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1166 : tensor<256x64x1x1xi8>) outs(%1287 : tensor<256x64x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1285
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1283
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x64x1x1xf32>
    %cast_1172 = tensor.cast %1288 : tensor<256x64x1x1xf32> to tensor<256x64x1x1xf32>
    %1289 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1290 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1173 = torch.constant.int 12
    %1291 = torch.aten.item %1289 : !torch.vtensor<[],f32> -> !torch.float
    %1292 = torch_c.to_f64 %1291
    %1293 = torch.aten.item %1290 : !torch.vtensor<[],si8> -> !torch.int
    %1294 = torch_c.to_i64 %1293
    %c1_1174 = arith.constant 1 : index
    %c0_1175 = arith.constant 0 : index
    %c256_1176 = arith.constant 256 : index
    %1295 = tensor.empty() : tensor<256xi8>
    %1296 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%44 : tensor<256xf32>) outs(%1295 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1293
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1291
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_1177 = tensor.cast %1296 : tensor<256xi8> to tensor<256xi8>
    %cast_1178 = tensor.cast %cast_1177 : tensor<256xi8> to tensor<256xi8>
    %1297 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1298 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1299 = torch.aten.item %1297 : !torch.vtensor<[],f32> -> !torch.float
    %1300 = torch_c.to_f64 %1299
    %1301 = torch.aten.item %1298 : !torch.vtensor<[],si8> -> !torch.int
    %1302 = torch_c.to_i64 %1301
    %cast_1179 = tensor.cast %cast_1178 : tensor<256xi8> to tensor<256xi8>
    %c1_1180 = arith.constant 1 : index
    %c0_1181 = arith.constant 0 : index
    %c256_1182 = arith.constant 256 : index
    %1303 = tensor.empty() : tensor<256xf32>
    %1304 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1179 : tensor<256xi8>) outs(%1303 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1301
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1299
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_1183 = tensor.cast %1304 : tensor<256xf32> to tensor<256xf32>
    %int0_1184 = torch.constant.int 0
    %int0_1185 = torch.constant.int 0
    %int1_1186 = torch.constant.int 1
    %int1_1187 = torch.constant.int 1
    %int1_1188 = torch.constant.int 1
    %int1_1189 = torch.constant.int 1
    %int0_1190 = torch.constant.int 0
    %1305 = torch.prim.ListConstruct %int0_1184, %int0_1185 : (!torch.int, !torch.int) -> !torch.list<int>
    %1306 = torch.prim.ListConstruct %int1_1186, %int1_1187 : (!torch.int, !torch.int) -> !torch.list<int>
    %1307 = torch.prim.ListConstruct %int1_1188, %int1_1189 : (!torch.int, !torch.int) -> !torch.list<int>
    %1308 = torch.prim.ListConstruct %int0_1190, %int0_1190 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_1191 = torch.constant.bool false
    %int1_1192 = torch.constant.int 1
    %1309 = torch_c.to_i64 %int1_1192
    %1310 = torch_c.to_i64 %int0_1184
    %1311 = torch_c.to_i64 %int0_1185
    %1312 = torch_c.to_i64 %int0_1190
    %1313 = torch_c.to_i64 %int0_1190
    %c0_1193 = arith.constant 0 : index
    %c1_1194 = arith.constant 1 : index
    %c1_1195 = arith.constant 1 : index
    %c64_1196 = arith.constant 64 : index
    %c2_1197 = arith.constant 2 : index
    %c56_1198 = arith.constant 56 : index
    %c3_1199 = arith.constant 3 : index
    %c56_1200 = arith.constant 56 : index
    %c0_1201 = arith.constant 0 : index
    %c256_1202 = arith.constant 256 : index
    %c1_1203 = arith.constant 1 : index
    %c64_1204 = arith.constant 64 : index
    %c2_1205 = arith.constant 2 : index
    %c1_1206 = arith.constant 1 : index
    %c3_1207 = arith.constant 3 : index
    %c1_1208 = arith.constant 1 : index
    %1314 = arith.index_cast %1309 : i64 to index
    %c0_1209 = arith.constant 0 : index
    %1315 = arith.remsi %c64_1196, %1314 : index
    %1316 = arith.cmpi eq, %c0_1209, %1315 : index
    cf.assert %1316, "invalid: groups must divide input channel size evenly."
    %c0_1210 = arith.constant 0 : index
    %1317 = arith.remsi %c256_1202, %1314 : index
    %1318 = arith.cmpi eq, %c0_1210, %1317 : index
    cf.assert %1318, "invalid: groups must divide weight batch size evenly."
    %c1_i64_1211 = arith.constant 1 : i64
    %c1_i64_1212 = arith.constant 1 : i64
    %c1_i64_1213 = arith.constant 1 : i64
    %c1_i64_1214 = arith.constant 1 : i64
    %cst_1215 = arith.constant 0.000000e+00 : f32
    %c0_1216 = arith.constant 0 : index
    %c1_1217 = arith.constant 1 : index
    %c1_1218 = arith.constant 1 : index
    %c64_1219 = arith.constant 64 : index
    %c2_1220 = arith.constant 2 : index
    %c56_1221 = arith.constant 56 : index
    %c3_1222 = arith.constant 3 : index
    %c56_1223 = arith.constant 56 : index
    %c0_i64_1224 = arith.constant 0 : i64
    %1319 = arith.index_cast %c0_i64_1224 : i64 to index
    %1320 = arith.index_cast %c0_i64_1224 : i64 to index
    %1321 = arith.index_cast %1310 : i64 to index
    %1322 = arith.index_cast %1311 : i64 to index
    %padded_1225 = tensor.pad %cast_1157 low[%1319, %1320, %1321, %1322] high[%1319, %1320, %1321, %1322] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_1215 : f32
    } : tensor<1x64x56x56xf32> to tensor<?x?x?x?xf32>
    %1323 = arith.index_cast %c1_1206 : index to i64
    %c1_i64_1226 = arith.constant 1 : i64
    %c2_i64_1227 = arith.constant 2 : i64
    %1324 = arith.muli %1310, %c2_i64_1227 : i64
    %1325 = arith.index_cast %c56_1198 : index to i64
    %1326 = arith.addi %1325, %1324 : i64
    %1327 = arith.subi %1323, %c1_i64_1226 : i64
    %1328 = arith.muli %c1_i64_1211, %1327 : i64
    %1329 = arith.subi %1326, %1328 : i64
    %1330 = arith.subi %1329, %c1_i64_1226 : i64
    %1331 = arith.floordivsi %1330, %c1_i64_1213 : i64
    %1332 = arith.addi %1331, %c1_i64_1226 : i64
    %1333 = arith.index_cast %1332 : i64 to index
    %1334 = arith.index_cast %c1_1208 : index to i64
    %c1_i64_1228 = arith.constant 1 : i64
    %c2_i64_1229 = arith.constant 2 : i64
    %1335 = arith.muli %1311, %c2_i64_1229 : i64
    %1336 = arith.index_cast %c56_1200 : index to i64
    %1337 = arith.addi %1336, %1335 : i64
    %1338 = arith.subi %1334, %c1_i64_1228 : i64
    %1339 = arith.muli %c1_i64_1212, %1338 : i64
    %1340 = arith.subi %1337, %1339 : i64
    %1341 = arith.subi %1340, %c1_i64_1228 : i64
    %1342 = arith.floordivsi %1341, %c1_i64_1214 : i64
    %1343 = arith.addi %1342, %c1_i64_1228 : i64
    %1344 = arith.index_cast %1343 : i64 to index
    %1345 = tensor.empty(%1333, %1344) : tensor<1x256x?x?xf32>
    %1346 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1183 : tensor<256xf32>) outs(%1345 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %1347 = arith.floordivsi %c64_1196, %1314 : index
    %1348 = arith.floordivsi %c256_1202, %1314 : index
    %c0_1230 = arith.constant 0 : index
    %c1_1231 = arith.constant 1 : index
    %1349 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1225, %cast_1172 : tensor<?x?x?x?xf32>, tensor<256x64x1x1xf32>) outs(%1346 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_1232 = tensor.cast %1349 : tensor<1x256x?x?xf32> to tensor<1x256x56x56xf32>
    %1350 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1351 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1233 = torch.constant.int 12
    %1352 = torch.aten.item %1350 : !torch.vtensor<[],f32> -> !torch.float
    %1353 = torch_c.to_f64 %1352
    %1354 = torch.aten.item %1351 : !torch.vtensor<[],si8> -> !torch.int
    %1355 = torch_c.to_i64 %1354
    %c1_1234 = arith.constant 1 : index
    %c1_1235 = arith.constant 1 : index
    %c256_1236 = arith.constant 256 : index
    %c2_1237 = arith.constant 2 : index
    %c56_1238 = arith.constant 56 : index
    %c3_1239 = arith.constant 3 : index
    %c56_1240 = arith.constant 56 : index
    %1356 = tensor.empty() : tensor<1x256x56x56xi8>
    %1357 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1232 : tensor<1x256x56x56xf32>) outs(%1356 : tensor<1x256x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1354
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1352
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x56x56xi8>
    %cast_1241 = tensor.cast %1357 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %cast_1242 = tensor.cast %cast_1241 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %1358 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1359 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1360 = torch.aten.item %1358 : !torch.vtensor<[],f32> -> !torch.float
    %1361 = torch_c.to_f64 %1360
    %1362 = torch.aten.item %1359 : !torch.vtensor<[],si8> -> !torch.int
    %1363 = torch_c.to_i64 %1362
    %cast_1243 = tensor.cast %cast_1242 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %c1_1244 = arith.constant 1 : index
    %c1_1245 = arith.constant 1 : index
    %c256_1246 = arith.constant 256 : index
    %c2_1247 = arith.constant 2 : index
    %c56_1248 = arith.constant 56 : index
    %c3_1249 = arith.constant 3 : index
    %c56_1250 = arith.constant 56 : index
    %1364 = tensor.empty() : tensor<1x256x56x56xf32>
    %1365 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1243 : tensor<1x256x56x56xi8>) outs(%1364 : tensor<1x256x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1362
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1360
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x56x56xf32>
    %cast_1251 = tensor.cast %1365 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
    %int1_1252 = torch.constant.int 1
    %1366 = torch_c.to_i64 %int1_1252
    %c1_1253 = arith.constant 1 : index
    %c1_1254 = arith.constant 1 : index
    %c256_1255 = arith.constant 256 : index
    %c2_1256 = arith.constant 2 : index
    %c56_1257 = arith.constant 56 : index
    %c3_1258 = arith.constant 3 : index
    %c56_1259 = arith.constant 56 : index
    %c1_1260 = arith.constant 1 : index
    %c256_1261 = arith.constant 256 : index
    %1367 = arith.cmpi eq, %c256_1255, %c256_1261 : index
    cf.assert %1367, "mismatched size for broadcast"
    %c2_1262 = arith.constant 2 : index
    %c56_1263 = arith.constant 56 : index
    %1368 = arith.cmpi eq, %c56_1257, %c56_1263 : index
    cf.assert %1368, "mismatched size for broadcast"
    %c3_1264 = arith.constant 3 : index
    %c56_1265 = arith.constant 56 : index
    %1369 = arith.cmpi eq, %c56_1259, %c56_1265 : index
    cf.assert %1369, "mismatched size for broadcast"
    %1370 = tensor.empty() : tensor<1x256x56x56xf32>
    %1371 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1251, %cast_945 : tensor<1x256x56x56xf32>, tensor<1x256x56x56xf32>) outs(%1370 : tensor<1x256x56x56xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %1366 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x256x56x56xf32>
    %cast_1266 = tensor.cast %1371 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
    %c1_1267 = arith.constant 1 : index
    %c1_1268 = arith.constant 1 : index
    %c256_1269 = arith.constant 256 : index
    %c2_1270 = arith.constant 2 : index
    %c56_1271 = arith.constant 56 : index
    %c3_1272 = arith.constant 3 : index
    %c56_1273 = arith.constant 56 : index
    %1372 = tensor.empty() : tensor<1x256x56x56xf32>
    %1373 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1266 : tensor<1x256x56x56xf32>) outs(%1372 : tensor<1x256x56x56xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x56x56xf32>
    %cast_1274 = tensor.cast %1373 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
    %1374 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1375 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1275 = torch.constant.int 12
    %1376 = torch.aten.item %1374 : !torch.vtensor<[],f32> -> !torch.float
    %1377 = torch_c.to_f64 %1376
    %1378 = torch.aten.item %1375 : !torch.vtensor<[],si8> -> !torch.int
    %1379 = torch_c.to_i64 %1378
    %c1_1276 = arith.constant 1 : index
    %c1_1277 = arith.constant 1 : index
    %c256_1278 = arith.constant 256 : index
    %c2_1279 = arith.constant 2 : index
    %c56_1280 = arith.constant 56 : index
    %c3_1281 = arith.constant 3 : index
    %c56_1282 = arith.constant 56 : index
    %1380 = tensor.empty() : tensor<1x256x56x56xi8>
    %1381 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1274 : tensor<1x256x56x56xf32>) outs(%1380 : tensor<1x256x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1378
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1376
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x56x56xi8>
    %cast_1283 = tensor.cast %1381 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %cast_1284 = tensor.cast %cast_1283 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %1382 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1383 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1384 = torch.aten.item %1382 : !torch.vtensor<[],f32> -> !torch.float
    %1385 = torch_c.to_f64 %1384
    %1386 = torch.aten.item %1383 : !torch.vtensor<[],si8> -> !torch.int
    %1387 = torch_c.to_i64 %1386
    %cast_1285 = tensor.cast %cast_1284 : tensor<1x256x56x56xi8> to tensor<1x256x56x56xi8>
    %c1_1286 = arith.constant 1 : index
    %c1_1287 = arith.constant 1 : index
    %c256_1288 = arith.constant 256 : index
    %c2_1289 = arith.constant 2 : index
    %c56_1290 = arith.constant 56 : index
    %c3_1291 = arith.constant 3 : index
    %c56_1292 = arith.constant 56 : index
    %1388 = tensor.empty() : tensor<1x256x56x56xf32>
    %1389 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1285 : tensor<1x256x56x56xi8>) outs(%1388 : tensor<1x256x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1386
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1384
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x56x56xf32>
    %cast_1293 = tensor.cast %1389 : tensor<1x256x56x56xf32> to tensor<1x256x56x56xf32>
    %1390 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1391 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1294 = torch.constant.int 12
    %1392 = torch.aten.item %1390 : !torch.vtensor<[],f32> -> !torch.float
    %1393 = torch_c.to_f64 %1392
    %1394 = torch.aten.item %1391 : !torch.vtensor<[],si8> -> !torch.int
    %1395 = torch_c.to_i64 %1394
    %c1_1295 = arith.constant 1 : index
    %c0_1296 = arith.constant 0 : index
    %c128 = arith.constant 128 : index
    %c1_1297 = arith.constant 1 : index
    %c256_1298 = arith.constant 256 : index
    %1396 = tensor.empty() : tensor<128x256x1x1xi8>
    %1397 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%46 : tensor<128x256x1x1xf32>) outs(%1396 : tensor<128x256x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1394
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1392
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128x256x1x1xi8>
    %cast_1299 = tensor.cast %1397 : tensor<128x256x1x1xi8> to tensor<128x256x1x1xi8>
    %cast_1300 = tensor.cast %cast_1299 : tensor<128x256x1x1xi8> to tensor<128x256x1x1xi8>
    %1398 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1399 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1400 = torch.aten.item %1398 : !torch.vtensor<[],f32> -> !torch.float
    %1401 = torch_c.to_f64 %1400
    %1402 = torch.aten.item %1399 : !torch.vtensor<[],si8> -> !torch.int
    %1403 = torch_c.to_i64 %1402
    %cast_1301 = tensor.cast %cast_1300 : tensor<128x256x1x1xi8> to tensor<128x256x1x1xi8>
    %c1_1302 = arith.constant 1 : index
    %c0_1303 = arith.constant 0 : index
    %c128_1304 = arith.constant 128 : index
    %c1_1305 = arith.constant 1 : index
    %c256_1306 = arith.constant 256 : index
    %1404 = tensor.empty() : tensor<128x256x1x1xf32>
    %1405 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1301 : tensor<128x256x1x1xi8>) outs(%1404 : tensor<128x256x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1402
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1400
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128x256x1x1xf32>
    %cast_1307 = tensor.cast %1405 : tensor<128x256x1x1xf32> to tensor<128x256x1x1xf32>
    %1406 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1407 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1308 = torch.constant.int 12
    %1408 = torch.aten.item %1406 : !torch.vtensor<[],f32> -> !torch.float
    %1409 = torch_c.to_f64 %1408
    %1410 = torch.aten.item %1407 : !torch.vtensor<[],si8> -> !torch.int
    %1411 = torch_c.to_i64 %1410
    %c1_1309 = arith.constant 1 : index
    %c0_1310 = arith.constant 0 : index
    %c128_1311 = arith.constant 128 : index
    %1412 = tensor.empty() : tensor<128xi8>
    %1413 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%48 : tensor<128xf32>) outs(%1412 : tensor<128xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1410
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1408
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128xi8>
    %cast_1312 = tensor.cast %1413 : tensor<128xi8> to tensor<128xi8>
    %cast_1313 = tensor.cast %cast_1312 : tensor<128xi8> to tensor<128xi8>
    %1414 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1415 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1416 = torch.aten.item %1414 : !torch.vtensor<[],f32> -> !torch.float
    %1417 = torch_c.to_f64 %1416
    %1418 = torch.aten.item %1415 : !torch.vtensor<[],si8> -> !torch.int
    %1419 = torch_c.to_i64 %1418
    %cast_1314 = tensor.cast %cast_1313 : tensor<128xi8> to tensor<128xi8>
    %c1_1315 = arith.constant 1 : index
    %c0_1316 = arith.constant 0 : index
    %c128_1317 = arith.constant 128 : index
    %1420 = tensor.empty() : tensor<128xf32>
    %1421 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1314 : tensor<128xi8>) outs(%1420 : tensor<128xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1418
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1416
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128xf32>
    %cast_1318 = tensor.cast %1421 : tensor<128xf32> to tensor<128xf32>
    %int0_1319 = torch.constant.int 0
    %int0_1320 = torch.constant.int 0
    %int1_1321 = torch.constant.int 1
    %int1_1322 = torch.constant.int 1
    %int1_1323 = torch.constant.int 1
    %int1_1324 = torch.constant.int 1
    %int0_1325 = torch.constant.int 0
    %1422 = torch.prim.ListConstruct %int0_1319, %int0_1320 : (!torch.int, !torch.int) -> !torch.list<int>
    %1423 = torch.prim.ListConstruct %int1_1321, %int1_1322 : (!torch.int, !torch.int) -> !torch.list<int>
    %1424 = torch.prim.ListConstruct %int1_1323, %int1_1324 : (!torch.int, !torch.int) -> !torch.list<int>
    %1425 = torch.prim.ListConstruct %int0_1325, %int0_1325 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_1326 = torch.constant.bool false
    %int1_1327 = torch.constant.int 1
    %1426 = torch_c.to_i64 %int1_1327
    %1427 = torch_c.to_i64 %int0_1319
    %1428 = torch_c.to_i64 %int0_1320
    %1429 = torch_c.to_i64 %int0_1325
    %1430 = torch_c.to_i64 %int0_1325
    %c0_1328 = arith.constant 0 : index
    %c1_1329 = arith.constant 1 : index
    %c1_1330 = arith.constant 1 : index
    %c256_1331 = arith.constant 256 : index
    %c2_1332 = arith.constant 2 : index
    %c56_1333 = arith.constant 56 : index
    %c3_1334 = arith.constant 3 : index
    %c56_1335 = arith.constant 56 : index
    %c0_1336 = arith.constant 0 : index
    %c128_1337 = arith.constant 128 : index
    %c1_1338 = arith.constant 1 : index
    %c256_1339 = arith.constant 256 : index
    %c2_1340 = arith.constant 2 : index
    %c1_1341 = arith.constant 1 : index
    %c3_1342 = arith.constant 3 : index
    %c1_1343 = arith.constant 1 : index
    %1431 = arith.index_cast %1426 : i64 to index
    %c0_1344 = arith.constant 0 : index
    %1432 = arith.remsi %c256_1331, %1431 : index
    %1433 = arith.cmpi eq, %c0_1344, %1432 : index
    cf.assert %1433, "invalid: groups must divide input channel size evenly."
    %c0_1345 = arith.constant 0 : index
    %1434 = arith.remsi %c128_1337, %1431 : index
    %1435 = arith.cmpi eq, %c0_1345, %1434 : index
    cf.assert %1435, "invalid: groups must divide weight batch size evenly."
    %c1_i64_1346 = arith.constant 1 : i64
    %c1_i64_1347 = arith.constant 1 : i64
    %c1_i64_1348 = arith.constant 1 : i64
    %c1_i64_1349 = arith.constant 1 : i64
    %cst_1350 = arith.constant 0.000000e+00 : f32
    %c0_1351 = arith.constant 0 : index
    %c1_1352 = arith.constant 1 : index
    %c1_1353 = arith.constant 1 : index
    %c256_1354 = arith.constant 256 : index
    %c2_1355 = arith.constant 2 : index
    %c56_1356 = arith.constant 56 : index
    %c3_1357 = arith.constant 3 : index
    %c56_1358 = arith.constant 56 : index
    %c0_i64_1359 = arith.constant 0 : i64
    %1436 = arith.index_cast %c0_i64_1359 : i64 to index
    %1437 = arith.index_cast %c0_i64_1359 : i64 to index
    %1438 = arith.index_cast %1427 : i64 to index
    %1439 = arith.index_cast %1428 : i64 to index
    %padded_1360 = tensor.pad %cast_1293 low[%1436, %1437, %1438, %1439] high[%1436, %1437, %1438, %1439] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_1350 : f32
    } : tensor<1x256x56x56xf32> to tensor<?x?x?x?xf32>
    %1440 = arith.index_cast %c1_1341 : index to i64
    %c1_i64_1361 = arith.constant 1 : i64
    %c2_i64_1362 = arith.constant 2 : i64
    %1441 = arith.muli %1427, %c2_i64_1362 : i64
    %1442 = arith.index_cast %c56_1333 : index to i64
    %1443 = arith.addi %1442, %1441 : i64
    %1444 = arith.subi %1440, %c1_i64_1361 : i64
    %1445 = arith.muli %c1_i64_1346, %1444 : i64
    %1446 = arith.subi %1443, %1445 : i64
    %1447 = arith.subi %1446, %c1_i64_1361 : i64
    %1448 = arith.floordivsi %1447, %c1_i64_1348 : i64
    %1449 = arith.addi %1448, %c1_i64_1361 : i64
    %1450 = arith.index_cast %1449 : i64 to index
    %1451 = arith.index_cast %c1_1343 : index to i64
    %c1_i64_1363 = arith.constant 1 : i64
    %c2_i64_1364 = arith.constant 2 : i64
    %1452 = arith.muli %1428, %c2_i64_1364 : i64
    %1453 = arith.index_cast %c56_1335 : index to i64
    %1454 = arith.addi %1453, %1452 : i64
    %1455 = arith.subi %1451, %c1_i64_1363 : i64
    %1456 = arith.muli %c1_i64_1347, %1455 : i64
    %1457 = arith.subi %1454, %1456 : i64
    %1458 = arith.subi %1457, %c1_i64_1363 : i64
    %1459 = arith.floordivsi %1458, %c1_i64_1349 : i64
    %1460 = arith.addi %1459, %c1_i64_1363 : i64
    %1461 = arith.index_cast %1460 : i64 to index
    %1462 = tensor.empty(%1450, %1461) : tensor<1x128x?x?xf32>
    %1463 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1318 : tensor<128xf32>) outs(%1462 : tensor<1x128x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x128x?x?xf32>
    %1464 = arith.floordivsi %c256_1331, %1431 : index
    %1465 = arith.floordivsi %c128_1337, %1431 : index
    %c0_1365 = arith.constant 0 : index
    %c1_1366 = arith.constant 1 : index
    %1466 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1360, %cast_1307 : tensor<?x?x?x?xf32>, tensor<128x256x1x1xf32>) outs(%1463 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
    %cast_1367 = tensor.cast %1466 : tensor<1x128x?x?xf32> to tensor<1x128x56x56xf32>
    %c1_1368 = arith.constant 1 : index
    %c1_1369 = arith.constant 1 : index
    %c128_1370 = arith.constant 128 : index
    %c2_1371 = arith.constant 2 : index
    %c56_1372 = arith.constant 56 : index
    %c3_1373 = arith.constant 3 : index
    %c56_1374 = arith.constant 56 : index
    %1467 = tensor.empty() : tensor<1x128x56x56xf32>
    %1468 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1367 : tensor<1x128x56x56xf32>) outs(%1467 : tensor<1x128x56x56xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x128x56x56xf32>
    %cast_1375 = tensor.cast %1468 : tensor<1x128x56x56xf32> to tensor<1x128x56x56xf32>
    %1469 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1470 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1376 = torch.constant.int 12
    %1471 = torch.aten.item %1469 : !torch.vtensor<[],f32> -> !torch.float
    %1472 = torch_c.to_f64 %1471
    %1473 = torch.aten.item %1470 : !torch.vtensor<[],si8> -> !torch.int
    %1474 = torch_c.to_i64 %1473
    %c1_1377 = arith.constant 1 : index
    %c1_1378 = arith.constant 1 : index
    %c128_1379 = arith.constant 128 : index
    %c2_1380 = arith.constant 2 : index
    %c56_1381 = arith.constant 56 : index
    %c3_1382 = arith.constant 3 : index
    %c56_1383 = arith.constant 56 : index
    %1475 = tensor.empty() : tensor<1x128x56x56xi8>
    %1476 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1375 : tensor<1x128x56x56xf32>) outs(%1475 : tensor<1x128x56x56xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1473
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1471
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x128x56x56xi8>
    %cast_1384 = tensor.cast %1476 : tensor<1x128x56x56xi8> to tensor<1x128x56x56xi8>
    %cast_1385 = tensor.cast %cast_1384 : tensor<1x128x56x56xi8> to tensor<1x128x56x56xi8>
    %1477 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1478 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1479 = torch.aten.item %1477 : !torch.vtensor<[],f32> -> !torch.float
    %1480 = torch_c.to_f64 %1479
    %1481 = torch.aten.item %1478 : !torch.vtensor<[],si8> -> !torch.int
    %1482 = torch_c.to_i64 %1481
    %cast_1386 = tensor.cast %cast_1385 : tensor<1x128x56x56xi8> to tensor<1x128x56x56xi8>
    %c1_1387 = arith.constant 1 : index
    %c1_1388 = arith.constant 1 : index
    %c128_1389 = arith.constant 128 : index
    %c2_1390 = arith.constant 2 : index
    %c56_1391 = arith.constant 56 : index
    %c3_1392 = arith.constant 3 : index
    %c56_1393 = arith.constant 56 : index
    %1483 = tensor.empty() : tensor<1x128x56x56xf32>
    %1484 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1386 : tensor<1x128x56x56xi8>) outs(%1483 : tensor<1x128x56x56xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1481
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1479
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x128x56x56xf32>
    %cast_1394 = tensor.cast %1484 : tensor<1x128x56x56xf32> to tensor<1x128x56x56xf32>
    %1485 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1486 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1395 = torch.constant.int 12
    %1487 = torch.aten.item %1485 : !torch.vtensor<[],f32> -> !torch.float
    %1488 = torch_c.to_f64 %1487
    %1489 = torch.aten.item %1486 : !torch.vtensor<[],si8> -> !torch.int
    %1490 = torch_c.to_i64 %1489
    %c1_1396 = arith.constant 1 : index
    %c0_1397 = arith.constant 0 : index
    %c128_1398 = arith.constant 128 : index
    %c1_1399 = arith.constant 1 : index
    %c128_1400 = arith.constant 128 : index
    %c2_1401 = arith.constant 2 : index
    %c3_1402 = arith.constant 3 : index
    %c3_1403 = arith.constant 3 : index
    %c3_1404 = arith.constant 3 : index
    %1491 = tensor.empty() : tensor<128x128x3x3xi8>
    %1492 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%50 : tensor<128x128x3x3xf32>) outs(%1491 : tensor<128x128x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1489
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1487
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128x128x3x3xi8>
    %cast_1405 = tensor.cast %1492 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
    %cast_1406 = tensor.cast %cast_1405 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
    %1493 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1494 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1495 = torch.aten.item %1493 : !torch.vtensor<[],f32> -> !torch.float
    %1496 = torch_c.to_f64 %1495
    %1497 = torch.aten.item %1494 : !torch.vtensor<[],si8> -> !torch.int
    %1498 = torch_c.to_i64 %1497
    %cast_1407 = tensor.cast %cast_1406 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
    %c1_1408 = arith.constant 1 : index
    %c0_1409 = arith.constant 0 : index
    %c128_1410 = arith.constant 128 : index
    %c1_1411 = arith.constant 1 : index
    %c128_1412 = arith.constant 128 : index
    %c2_1413 = arith.constant 2 : index
    %c3_1414 = arith.constant 3 : index
    %c3_1415 = arith.constant 3 : index
    %c3_1416 = arith.constant 3 : index
    %1499 = tensor.empty() : tensor<128x128x3x3xf32>
    %1500 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1407 : tensor<128x128x3x3xi8>) outs(%1499 : tensor<128x128x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1497
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1495
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128x128x3x3xf32>
    %cast_1417 = tensor.cast %1500 : tensor<128x128x3x3xf32> to tensor<128x128x3x3xf32>
    %1501 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1502 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1418 = torch.constant.int 12
    %1503 = torch.aten.item %1501 : !torch.vtensor<[],f32> -> !torch.float
    %1504 = torch_c.to_f64 %1503
    %1505 = torch.aten.item %1502 : !torch.vtensor<[],si8> -> !torch.int
    %1506 = torch_c.to_i64 %1505
    %c1_1419 = arith.constant 1 : index
    %c0_1420 = arith.constant 0 : index
    %c128_1421 = arith.constant 128 : index
    %1507 = tensor.empty() : tensor<128xi8>
    %1508 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%52 : tensor<128xf32>) outs(%1507 : tensor<128xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1505
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1503
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128xi8>
    %cast_1422 = tensor.cast %1508 : tensor<128xi8> to tensor<128xi8>
    %cast_1423 = tensor.cast %cast_1422 : tensor<128xi8> to tensor<128xi8>
    %1509 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1510 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1511 = torch.aten.item %1509 : !torch.vtensor<[],f32> -> !torch.float
    %1512 = torch_c.to_f64 %1511
    %1513 = torch.aten.item %1510 : !torch.vtensor<[],si8> -> !torch.int
    %1514 = torch_c.to_i64 %1513
    %cast_1424 = tensor.cast %cast_1423 : tensor<128xi8> to tensor<128xi8>
    %c1_1425 = arith.constant 1 : index
    %c0_1426 = arith.constant 0 : index
    %c128_1427 = arith.constant 128 : index
    %1515 = tensor.empty() : tensor<128xf32>
    %1516 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1424 : tensor<128xi8>) outs(%1515 : tensor<128xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1513
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1511
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128xf32>
    %cast_1428 = tensor.cast %1516 : tensor<128xf32> to tensor<128xf32>
    %int1_1429 = torch.constant.int 1
    %int1_1430 = torch.constant.int 1
    %int1_1431 = torch.constant.int 1
    %int1_1432 = torch.constant.int 1
    %int2_1433 = torch.constant.int 2
    %int2_1434 = torch.constant.int 2
    %int0_1435 = torch.constant.int 0
    %1517 = torch.prim.ListConstruct %int1_1429, %int1_1430 : (!torch.int, !torch.int) -> !torch.list<int>
    %1518 = torch.prim.ListConstruct %int1_1431, %int1_1432 : (!torch.int, !torch.int) -> !torch.list<int>
    %1519 = torch.prim.ListConstruct %int2_1433, %int2_1434 : (!torch.int, !torch.int) -> !torch.list<int>
    %1520 = torch.prim.ListConstruct %int0_1435, %int0_1435 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_1436 = torch.constant.bool false
    %int1_1437 = torch.constant.int 1
    %1521 = torch_c.to_i64 %int1_1437
    %1522 = torch_c.to_i64 %int1_1429
    %1523 = torch_c.to_i64 %int1_1430
    %1524 = torch_c.to_i64 %int0_1435
    %1525 = torch_c.to_i64 %int0_1435
    %c0_1438 = arith.constant 0 : index
    %c1_1439 = arith.constant 1 : index
    %c1_1440 = arith.constant 1 : index
    %c128_1441 = arith.constant 128 : index
    %c2_1442 = arith.constant 2 : index
    %c56_1443 = arith.constant 56 : index
    %c3_1444 = arith.constant 3 : index
    %c56_1445 = arith.constant 56 : index
    %c0_1446 = arith.constant 0 : index
    %c128_1447 = arith.constant 128 : index
    %c1_1448 = arith.constant 1 : index
    %c128_1449 = arith.constant 128 : index
    %c2_1450 = arith.constant 2 : index
    %c3_1451 = arith.constant 3 : index
    %c3_1452 = arith.constant 3 : index
    %c3_1453 = arith.constant 3 : index
    %1526 = arith.index_cast %1521 : i64 to index
    %c0_1454 = arith.constant 0 : index
    %1527 = arith.remsi %c128_1441, %1526 : index
    %1528 = arith.cmpi eq, %c0_1454, %1527 : index
    cf.assert %1528, "invalid: groups must divide input channel size evenly."
    %c0_1455 = arith.constant 0 : index
    %1529 = arith.remsi %c128_1447, %1526 : index
    %1530 = arith.cmpi eq, %c0_1455, %1529 : index
    cf.assert %1530, "invalid: groups must divide weight batch size evenly."
    %c1_i64_1456 = arith.constant 1 : i64
    %c1_i64_1457 = arith.constant 1 : i64
    %c2_i64_1458 = arith.constant 2 : i64
    %c2_i64_1459 = arith.constant 2 : i64
    %cst_1460 = arith.constant 0.000000e+00 : f32
    %c0_1461 = arith.constant 0 : index
    %c1_1462 = arith.constant 1 : index
    %c1_1463 = arith.constant 1 : index
    %c128_1464 = arith.constant 128 : index
    %c2_1465 = arith.constant 2 : index
    %c56_1466 = arith.constant 56 : index
    %c3_1467 = arith.constant 3 : index
    %c56_1468 = arith.constant 56 : index
    %c0_i64_1469 = arith.constant 0 : i64
    %1531 = arith.index_cast %c0_i64_1469 : i64 to index
    %1532 = arith.index_cast %c0_i64_1469 : i64 to index
    %1533 = arith.index_cast %1522 : i64 to index
    %1534 = arith.index_cast %1523 : i64 to index
    %padded_1470 = tensor.pad %cast_1394 low[%1531, %1532, %1533, %1534] high[%1531, %1532, %1533, %1534] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_1460 : f32
    } : tensor<1x128x56x56xf32> to tensor<?x?x?x?xf32>
    %1535 = arith.index_cast %c3_1451 : index to i64
    %c1_i64_1471 = arith.constant 1 : i64
    %c2_i64_1472 = arith.constant 2 : i64
    %1536 = arith.muli %1522, %c2_i64_1472 : i64
    %1537 = arith.index_cast %c56_1443 : index to i64
    %1538 = arith.addi %1537, %1536 : i64
    %1539 = arith.subi %1535, %c1_i64_1471 : i64
    %1540 = arith.muli %c1_i64_1456, %1539 : i64
    %1541 = arith.subi %1538, %1540 : i64
    %1542 = arith.subi %1541, %c1_i64_1471 : i64
    %1543 = arith.floordivsi %1542, %c2_i64_1458 : i64
    %1544 = arith.addi %1543, %c1_i64_1471 : i64
    %1545 = arith.index_cast %1544 : i64 to index
    %1546 = arith.index_cast %c3_1453 : index to i64
    %c1_i64_1473 = arith.constant 1 : i64
    %c2_i64_1474 = arith.constant 2 : i64
    %1547 = arith.muli %1523, %c2_i64_1474 : i64
    %1548 = arith.index_cast %c56_1445 : index to i64
    %1549 = arith.addi %1548, %1547 : i64
    %1550 = arith.subi %1546, %c1_i64_1473 : i64
    %1551 = arith.muli %c1_i64_1457, %1550 : i64
    %1552 = arith.subi %1549, %1551 : i64
    %1553 = arith.subi %1552, %c1_i64_1473 : i64
    %1554 = arith.floordivsi %1553, %c2_i64_1459 : i64
    %1555 = arith.addi %1554, %c1_i64_1473 : i64
    %1556 = arith.index_cast %1555 : i64 to index
    %1557 = tensor.empty(%1545, %1556) : tensor<1x128x?x?xf32>
    %1558 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1428 : tensor<128xf32>) outs(%1557 : tensor<1x128x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x128x?x?xf32>
    %1559 = arith.floordivsi %c128_1441, %1526 : index
    %1560 = arith.floordivsi %c128_1447, %1526 : index
    %c0_1475 = arith.constant 0 : index
    %c1_1476 = arith.constant 1 : index
    %1561 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_1470, %cast_1417 : tensor<?x?x?x?xf32>, tensor<128x128x3x3xf32>) outs(%1558 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
    %cast_1477 = tensor.cast %1561 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
    %c1_1478 = arith.constant 1 : index
    %c1_1479 = arith.constant 1 : index
    %c128_1480 = arith.constant 128 : index
    %c2_1481 = arith.constant 2 : index
    %c28 = arith.constant 28 : index
    %c3_1482 = arith.constant 3 : index
    %c28_1483 = arith.constant 28 : index
    %1562 = tensor.empty() : tensor<1x128x28x28xf32>
    %1563 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1477 : tensor<1x128x28x28xf32>) outs(%1562 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_1484 = tensor.cast %1563 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %1564 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1565 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1485 = torch.constant.int 12
    %1566 = torch.aten.item %1564 : !torch.vtensor<[],f32> -> !torch.float
    %1567 = torch_c.to_f64 %1566
    %1568 = torch.aten.item %1565 : !torch.vtensor<[],si8> -> !torch.int
    %1569 = torch_c.to_i64 %1568
    %c1_1486 = arith.constant 1 : index
    %c1_1487 = arith.constant 1 : index
    %c128_1488 = arith.constant 128 : index
    %c2_1489 = arith.constant 2 : index
    %c28_1490 = arith.constant 28 : index
    %c3_1491 = arith.constant 3 : index
    %c28_1492 = arith.constant 28 : index
    %1570 = tensor.empty() : tensor<1x128x28x28xi8>
    %1571 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1484 : tensor<1x128x28x28xf32>) outs(%1570 : tensor<1x128x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1568
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1566
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x128x28x28xi8>
    %cast_1493 = tensor.cast %1571 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %cast_1494 = tensor.cast %cast_1493 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %1572 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1573 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1574 = torch.aten.item %1572 : !torch.vtensor<[],f32> -> !torch.float
    %1575 = torch_c.to_f64 %1574
    %1576 = torch.aten.item %1573 : !torch.vtensor<[],si8> -> !torch.int
    %1577 = torch_c.to_i64 %1576
    %cast_1495 = tensor.cast %cast_1494 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %c1_1496 = arith.constant 1 : index
    %c1_1497 = arith.constant 1 : index
    %c128_1498 = arith.constant 128 : index
    %c2_1499 = arith.constant 2 : index
    %c28_1500 = arith.constant 28 : index
    %c3_1501 = arith.constant 3 : index
    %c28_1502 = arith.constant 28 : index
    %1578 = tensor.empty() : tensor<1x128x28x28xf32>
    %1579 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1495 : tensor<1x128x28x28xi8>) outs(%1578 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1576
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1574
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_1503 = tensor.cast %1579 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %1580 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1581 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1504 = torch.constant.int 12
    %1582 = torch.aten.item %1580 : !torch.vtensor<[],f32> -> !torch.float
    %1583 = torch_c.to_f64 %1582
    %1584 = torch.aten.item %1581 : !torch.vtensor<[],si8> -> !torch.int
    %1585 = torch_c.to_i64 %1584
    %c1_1505 = arith.constant 1 : index
    %c0_1506 = arith.constant 0 : index
    %c512 = arith.constant 512 : index
    %c1_1507 = arith.constant 1 : index
    %c128_1508 = arith.constant 128 : index
    %1586 = tensor.empty() : tensor<512x128x1x1xi8>
    %1587 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%54 : tensor<512x128x1x1xf32>) outs(%1586 : tensor<512x128x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1584
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1582
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512x128x1x1xi8>
    %cast_1509 = tensor.cast %1587 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
    %cast_1510 = tensor.cast %cast_1509 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
    %1588 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1589 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1590 = torch.aten.item %1588 : !torch.vtensor<[],f32> -> !torch.float
    %1591 = torch_c.to_f64 %1590
    %1592 = torch.aten.item %1589 : !torch.vtensor<[],si8> -> !torch.int
    %1593 = torch_c.to_i64 %1592
    %cast_1511 = tensor.cast %cast_1510 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
    %c1_1512 = arith.constant 1 : index
    %c0_1513 = arith.constant 0 : index
    %c512_1514 = arith.constant 512 : index
    %c1_1515 = arith.constant 1 : index
    %c128_1516 = arith.constant 128 : index
    %1594 = tensor.empty() : tensor<512x128x1x1xf32>
    %1595 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1511 : tensor<512x128x1x1xi8>) outs(%1594 : tensor<512x128x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1592
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1590
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512x128x1x1xf32>
    %cast_1517 = tensor.cast %1595 : tensor<512x128x1x1xf32> to tensor<512x128x1x1xf32>
    %1596 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1597 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1518 = torch.constant.int 12
    %1598 = torch.aten.item %1596 : !torch.vtensor<[],f32> -> !torch.float
    %1599 = torch_c.to_f64 %1598
    %1600 = torch.aten.item %1597 : !torch.vtensor<[],si8> -> !torch.int
    %1601 = torch_c.to_i64 %1600
    %c1_1519 = arith.constant 1 : index
    %c0_1520 = arith.constant 0 : index
    %c512_1521 = arith.constant 512 : index
    %1602 = tensor.empty() : tensor<512xi8>
    %1603 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%56 : tensor<512xf32>) outs(%1602 : tensor<512xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1600
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1598
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512xi8>
    %cast_1522 = tensor.cast %1603 : tensor<512xi8> to tensor<512xi8>
    %cast_1523 = tensor.cast %cast_1522 : tensor<512xi8> to tensor<512xi8>
    %1604 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1605 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1606 = torch.aten.item %1604 : !torch.vtensor<[],f32> -> !torch.float
    %1607 = torch_c.to_f64 %1606
    %1608 = torch.aten.item %1605 : !torch.vtensor<[],si8> -> !torch.int
    %1609 = torch_c.to_i64 %1608
    %cast_1524 = tensor.cast %cast_1523 : tensor<512xi8> to tensor<512xi8>
    %c1_1525 = arith.constant 1 : index
    %c0_1526 = arith.constant 0 : index
    %c512_1527 = arith.constant 512 : index
    %1610 = tensor.empty() : tensor<512xf32>
    %1611 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1524 : tensor<512xi8>) outs(%1610 : tensor<512xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1608
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1606
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512xf32>
    %cast_1528 = tensor.cast %1611 : tensor<512xf32> to tensor<512xf32>
    %int0_1529 = torch.constant.int 0
    %int0_1530 = torch.constant.int 0
    %int1_1531 = torch.constant.int 1
    %int1_1532 = torch.constant.int 1
    %int1_1533 = torch.constant.int 1
    %int1_1534 = torch.constant.int 1
    %int0_1535 = torch.constant.int 0
    %1612 = torch.prim.ListConstruct %int0_1529, %int0_1530 : (!torch.int, !torch.int) -> !torch.list<int>
    %1613 = torch.prim.ListConstruct %int1_1531, %int1_1532 : (!torch.int, !torch.int) -> !torch.list<int>
    %1614 = torch.prim.ListConstruct %int1_1533, %int1_1534 : (!torch.int, !torch.int) -> !torch.list<int>
    %1615 = torch.prim.ListConstruct %int0_1535, %int0_1535 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_1536 = torch.constant.bool false
    %int1_1537 = torch.constant.int 1
    %1616 = torch_c.to_i64 %int1_1537
    %1617 = torch_c.to_i64 %int0_1529
    %1618 = torch_c.to_i64 %int0_1530
    %1619 = torch_c.to_i64 %int0_1535
    %1620 = torch_c.to_i64 %int0_1535
    %c0_1538 = arith.constant 0 : index
    %c1_1539 = arith.constant 1 : index
    %c1_1540 = arith.constant 1 : index
    %c128_1541 = arith.constant 128 : index
    %c2_1542 = arith.constant 2 : index
    %c28_1543 = arith.constant 28 : index
    %c3_1544 = arith.constant 3 : index
    %c28_1545 = arith.constant 28 : index
    %c0_1546 = arith.constant 0 : index
    %c512_1547 = arith.constant 512 : index
    %c1_1548 = arith.constant 1 : index
    %c128_1549 = arith.constant 128 : index
    %c2_1550 = arith.constant 2 : index
    %c1_1551 = arith.constant 1 : index
    %c3_1552 = arith.constant 3 : index
    %c1_1553 = arith.constant 1 : index
    %1621 = arith.index_cast %1616 : i64 to index
    %c0_1554 = arith.constant 0 : index
    %1622 = arith.remsi %c128_1541, %1621 : index
    %1623 = arith.cmpi eq, %c0_1554, %1622 : index
    cf.assert %1623, "invalid: groups must divide input channel size evenly."
    %c0_1555 = arith.constant 0 : index
    %1624 = arith.remsi %c512_1547, %1621 : index
    %1625 = arith.cmpi eq, %c0_1555, %1624 : index
    cf.assert %1625, "invalid: groups must divide weight batch size evenly."
    %c1_i64_1556 = arith.constant 1 : i64
    %c1_i64_1557 = arith.constant 1 : i64
    %c1_i64_1558 = arith.constant 1 : i64
    %c1_i64_1559 = arith.constant 1 : i64
    %cst_1560 = arith.constant 0.000000e+00 : f32
    %c0_1561 = arith.constant 0 : index
    %c1_1562 = arith.constant 1 : index
    %c1_1563 = arith.constant 1 : index
    %c128_1564 = arith.constant 128 : index
    %c2_1565 = arith.constant 2 : index
    %c28_1566 = arith.constant 28 : index
    %c3_1567 = arith.constant 3 : index
    %c28_1568 = arith.constant 28 : index
    %c0_i64_1569 = arith.constant 0 : i64
    %1626 = arith.index_cast %c0_i64_1569 : i64 to index
    %1627 = arith.index_cast %c0_i64_1569 : i64 to index
    %1628 = arith.index_cast %1617 : i64 to index
    %1629 = arith.index_cast %1618 : i64 to index
    %padded_1570 = tensor.pad %cast_1503 low[%1626, %1627, %1628, %1629] high[%1626, %1627, %1628, %1629] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_1560 : f32
    } : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
    %1630 = arith.index_cast %c1_1551 : index to i64
    %c1_i64_1571 = arith.constant 1 : i64
    %c2_i64_1572 = arith.constant 2 : i64
    %1631 = arith.muli %1617, %c2_i64_1572 : i64
    %1632 = arith.index_cast %c28_1543 : index to i64
    %1633 = arith.addi %1632, %1631 : i64
    %1634 = arith.subi %1630, %c1_i64_1571 : i64
    %1635 = arith.muli %c1_i64_1556, %1634 : i64
    %1636 = arith.subi %1633, %1635 : i64
    %1637 = arith.subi %1636, %c1_i64_1571 : i64
    %1638 = arith.floordivsi %1637, %c1_i64_1558 : i64
    %1639 = arith.addi %1638, %c1_i64_1571 : i64
    %1640 = arith.index_cast %1639 : i64 to index
    %1641 = arith.index_cast %c1_1553 : index to i64
    %c1_i64_1573 = arith.constant 1 : i64
    %c2_i64_1574 = arith.constant 2 : i64
    %1642 = arith.muli %1618, %c2_i64_1574 : i64
    %1643 = arith.index_cast %c28_1545 : index to i64
    %1644 = arith.addi %1643, %1642 : i64
    %1645 = arith.subi %1641, %c1_i64_1573 : i64
    %1646 = arith.muli %c1_i64_1557, %1645 : i64
    %1647 = arith.subi %1644, %1646 : i64
    %1648 = arith.subi %1647, %c1_i64_1573 : i64
    %1649 = arith.floordivsi %1648, %c1_i64_1559 : i64
    %1650 = arith.addi %1649, %c1_i64_1573 : i64
    %1651 = arith.index_cast %1650 : i64 to index
    %1652 = tensor.empty(%1640, %1651) : tensor<1x512x?x?xf32>
    %1653 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1528 : tensor<512xf32>) outs(%1652 : tensor<1x512x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x512x?x?xf32>
    %1654 = arith.floordivsi %c128_1541, %1621 : index
    %1655 = arith.floordivsi %c512_1547, %1621 : index
    %c0_1575 = arith.constant 0 : index
    %c1_1576 = arith.constant 1 : index
    %1656 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1570, %cast_1517 : tensor<?x?x?x?xf32>, tensor<512x128x1x1xf32>) outs(%1653 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
    %cast_1577 = tensor.cast %1656 : tensor<1x512x?x?xf32> to tensor<1x512x28x28xf32>
    %1657 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1658 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1578 = torch.constant.int 12
    %1659 = torch.aten.item %1657 : !torch.vtensor<[],f32> -> !torch.float
    %1660 = torch_c.to_f64 %1659
    %1661 = torch.aten.item %1658 : !torch.vtensor<[],si8> -> !torch.int
    %1662 = torch_c.to_i64 %1661
    %c1_1579 = arith.constant 1 : index
    %c1_1580 = arith.constant 1 : index
    %c512_1581 = arith.constant 512 : index
    %c2_1582 = arith.constant 2 : index
    %c28_1583 = arith.constant 28 : index
    %c3_1584 = arith.constant 3 : index
    %c28_1585 = arith.constant 28 : index
    %1663 = tensor.empty() : tensor<1x512x28x28xi8>
    %1664 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1577 : tensor<1x512x28x28xf32>) outs(%1663 : tensor<1x512x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1661
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1659
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x28x28xi8>
    %cast_1586 = tensor.cast %1664 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %cast_1587 = tensor.cast %cast_1586 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %1665 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1666 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1667 = torch.aten.item %1665 : !torch.vtensor<[],f32> -> !torch.float
    %1668 = torch_c.to_f64 %1667
    %1669 = torch.aten.item %1666 : !torch.vtensor<[],si8> -> !torch.int
    %1670 = torch_c.to_i64 %1669
    %cast_1588 = tensor.cast %cast_1587 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %c1_1589 = arith.constant 1 : index
    %c1_1590 = arith.constant 1 : index
    %c512_1591 = arith.constant 512 : index
    %c2_1592 = arith.constant 2 : index
    %c28_1593 = arith.constant 28 : index
    %c3_1594 = arith.constant 3 : index
    %c28_1595 = arith.constant 28 : index
    %1671 = tensor.empty() : tensor<1x512x28x28xf32>
    %1672 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1588 : tensor<1x512x28x28xi8>) outs(%1671 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1669
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1667
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_1596 = tensor.cast %1672 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %1673 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1674 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1597 = torch.constant.int 12
    %1675 = torch.aten.item %1673 : !torch.vtensor<[],f32> -> !torch.float
    %1676 = torch_c.to_f64 %1675
    %1677 = torch.aten.item %1674 : !torch.vtensor<[],si8> -> !torch.int
    %1678 = torch_c.to_i64 %1677
    %c1_1598 = arith.constant 1 : index
    %c0_1599 = arith.constant 0 : index
    %c512_1600 = arith.constant 512 : index
    %c1_1601 = arith.constant 1 : index
    %c256_1602 = arith.constant 256 : index
    %1679 = tensor.empty() : tensor<512x256x1x1xi8>
    %1680 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%58 : tensor<512x256x1x1xf32>) outs(%1679 : tensor<512x256x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1677
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1675
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512x256x1x1xi8>
    %cast_1603 = tensor.cast %1680 : tensor<512x256x1x1xi8> to tensor<512x256x1x1xi8>
    %cast_1604 = tensor.cast %cast_1603 : tensor<512x256x1x1xi8> to tensor<512x256x1x1xi8>
    %1681 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1682 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1683 = torch.aten.item %1681 : !torch.vtensor<[],f32> -> !torch.float
    %1684 = torch_c.to_f64 %1683
    %1685 = torch.aten.item %1682 : !torch.vtensor<[],si8> -> !torch.int
    %1686 = torch_c.to_i64 %1685
    %cast_1605 = tensor.cast %cast_1604 : tensor<512x256x1x1xi8> to tensor<512x256x1x1xi8>
    %c1_1606 = arith.constant 1 : index
    %c0_1607 = arith.constant 0 : index
    %c512_1608 = arith.constant 512 : index
    %c1_1609 = arith.constant 1 : index
    %c256_1610 = arith.constant 256 : index
    %1687 = tensor.empty() : tensor<512x256x1x1xf32>
    %1688 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1605 : tensor<512x256x1x1xi8>) outs(%1687 : tensor<512x256x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1685
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1683
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512x256x1x1xf32>
    %cast_1611 = tensor.cast %1688 : tensor<512x256x1x1xf32> to tensor<512x256x1x1xf32>
    %1689 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1690 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1612 = torch.constant.int 12
    %1691 = torch.aten.item %1689 : !torch.vtensor<[],f32> -> !torch.float
    %1692 = torch_c.to_f64 %1691
    %1693 = torch.aten.item %1690 : !torch.vtensor<[],si8> -> !torch.int
    %1694 = torch_c.to_i64 %1693
    %c1_1613 = arith.constant 1 : index
    %c0_1614 = arith.constant 0 : index
    %c512_1615 = arith.constant 512 : index
    %1695 = tensor.empty() : tensor<512xi8>
    %1696 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%60 : tensor<512xf32>) outs(%1695 : tensor<512xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1693
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1691
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512xi8>
    %cast_1616 = tensor.cast %1696 : tensor<512xi8> to tensor<512xi8>
    %cast_1617 = tensor.cast %cast_1616 : tensor<512xi8> to tensor<512xi8>
    %1697 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1698 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1699 = torch.aten.item %1697 : !torch.vtensor<[],f32> -> !torch.float
    %1700 = torch_c.to_f64 %1699
    %1701 = torch.aten.item %1698 : !torch.vtensor<[],si8> -> !torch.int
    %1702 = torch_c.to_i64 %1701
    %cast_1618 = tensor.cast %cast_1617 : tensor<512xi8> to tensor<512xi8>
    %c1_1619 = arith.constant 1 : index
    %c0_1620 = arith.constant 0 : index
    %c512_1621 = arith.constant 512 : index
    %1703 = tensor.empty() : tensor<512xf32>
    %1704 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1618 : tensor<512xi8>) outs(%1703 : tensor<512xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1701
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1699
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512xf32>
    %cast_1622 = tensor.cast %1704 : tensor<512xf32> to tensor<512xf32>
    %int0_1623 = torch.constant.int 0
    %int0_1624 = torch.constant.int 0
    %int1_1625 = torch.constant.int 1
    %int1_1626 = torch.constant.int 1
    %int2_1627 = torch.constant.int 2
    %int2_1628 = torch.constant.int 2
    %int0_1629 = torch.constant.int 0
    %1705 = torch.prim.ListConstruct %int0_1623, %int0_1624 : (!torch.int, !torch.int) -> !torch.list<int>
    %1706 = torch.prim.ListConstruct %int1_1625, %int1_1626 : (!torch.int, !torch.int) -> !torch.list<int>
    %1707 = torch.prim.ListConstruct %int2_1627, %int2_1628 : (!torch.int, !torch.int) -> !torch.list<int>
    %1708 = torch.prim.ListConstruct %int0_1629, %int0_1629 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_1630 = torch.constant.bool false
    %int1_1631 = torch.constant.int 1
    %1709 = torch_c.to_i64 %int1_1631
    %1710 = torch_c.to_i64 %int0_1623
    %1711 = torch_c.to_i64 %int0_1624
    %1712 = torch_c.to_i64 %int0_1629
    %1713 = torch_c.to_i64 %int0_1629
    %c0_1632 = arith.constant 0 : index
    %c1_1633 = arith.constant 1 : index
    %c1_1634 = arith.constant 1 : index
    %c256_1635 = arith.constant 256 : index
    %c2_1636 = arith.constant 2 : index
    %c56_1637 = arith.constant 56 : index
    %c3_1638 = arith.constant 3 : index
    %c56_1639 = arith.constant 56 : index
    %c0_1640 = arith.constant 0 : index
    %c512_1641 = arith.constant 512 : index
    %c1_1642 = arith.constant 1 : index
    %c256_1643 = arith.constant 256 : index
    %c2_1644 = arith.constant 2 : index
    %c1_1645 = arith.constant 1 : index
    %c3_1646 = arith.constant 3 : index
    %c1_1647 = arith.constant 1 : index
    %1714 = arith.index_cast %1709 : i64 to index
    %c0_1648 = arith.constant 0 : index
    %1715 = arith.remsi %c256_1635, %1714 : index
    %1716 = arith.cmpi eq, %c0_1648, %1715 : index
    cf.assert %1716, "invalid: groups must divide input channel size evenly."
    %c0_1649 = arith.constant 0 : index
    %1717 = arith.remsi %c512_1641, %1714 : index
    %1718 = arith.cmpi eq, %c0_1649, %1717 : index
    cf.assert %1718, "invalid: groups must divide weight batch size evenly."
    %c1_i64_1650 = arith.constant 1 : i64
    %c1_i64_1651 = arith.constant 1 : i64
    %c2_i64_1652 = arith.constant 2 : i64
    %c2_i64_1653 = arith.constant 2 : i64
    %cst_1654 = arith.constant 0.000000e+00 : f32
    %c0_1655 = arith.constant 0 : index
    %c1_1656 = arith.constant 1 : index
    %c1_1657 = arith.constant 1 : index
    %c256_1658 = arith.constant 256 : index
    %c2_1659 = arith.constant 2 : index
    %c56_1660 = arith.constant 56 : index
    %c3_1661 = arith.constant 3 : index
    %c56_1662 = arith.constant 56 : index
    %c0_i64_1663 = arith.constant 0 : i64
    %1719 = arith.index_cast %c0_i64_1663 : i64 to index
    %1720 = arith.index_cast %c0_i64_1663 : i64 to index
    %1721 = arith.index_cast %1710 : i64 to index
    %1722 = arith.index_cast %1711 : i64 to index
    %padded_1664 = tensor.pad %cast_1293 low[%1719, %1720, %1721, %1722] high[%1719, %1720, %1721, %1722] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_1654 : f32
    } : tensor<1x256x56x56xf32> to tensor<?x?x?x?xf32>
    %1723 = arith.index_cast %c1_1645 : index to i64
    %c1_i64_1665 = arith.constant 1 : i64
    %c2_i64_1666 = arith.constant 2 : i64
    %1724 = arith.muli %1710, %c2_i64_1666 : i64
    %1725 = arith.index_cast %c56_1637 : index to i64
    %1726 = arith.addi %1725, %1724 : i64
    %1727 = arith.subi %1723, %c1_i64_1665 : i64
    %1728 = arith.muli %c1_i64_1650, %1727 : i64
    %1729 = arith.subi %1726, %1728 : i64
    %1730 = arith.subi %1729, %c1_i64_1665 : i64
    %1731 = arith.floordivsi %1730, %c2_i64_1652 : i64
    %1732 = arith.addi %1731, %c1_i64_1665 : i64
    %1733 = arith.index_cast %1732 : i64 to index
    %1734 = arith.index_cast %c1_1647 : index to i64
    %c1_i64_1667 = arith.constant 1 : i64
    %c2_i64_1668 = arith.constant 2 : i64
    %1735 = arith.muli %1711, %c2_i64_1668 : i64
    %1736 = arith.index_cast %c56_1639 : index to i64
    %1737 = arith.addi %1736, %1735 : i64
    %1738 = arith.subi %1734, %c1_i64_1667 : i64
    %1739 = arith.muli %c1_i64_1651, %1738 : i64
    %1740 = arith.subi %1737, %1739 : i64
    %1741 = arith.subi %1740, %c1_i64_1667 : i64
    %1742 = arith.floordivsi %1741, %c2_i64_1653 : i64
    %1743 = arith.addi %1742, %c1_i64_1667 : i64
    %1744 = arith.index_cast %1743 : i64 to index
    %1745 = tensor.empty(%1733, %1744) : tensor<1x512x?x?xf32>
    %1746 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1622 : tensor<512xf32>) outs(%1745 : tensor<1x512x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x512x?x?xf32>
    %1747 = arith.floordivsi %c256_1635, %1714 : index
    %1748 = arith.floordivsi %c512_1641, %1714 : index
    %c0_1669 = arith.constant 0 : index
    %c1_1670 = arith.constant 1 : index
    %1749 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_1664, %cast_1611 : tensor<?x?x?x?xf32>, tensor<512x256x1x1xf32>) outs(%1746 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
    %cast_1671 = tensor.cast %1749 : tensor<1x512x?x?xf32> to tensor<1x512x28x28xf32>
    %1750 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1751 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1672 = torch.constant.int 12
    %1752 = torch.aten.item %1750 : !torch.vtensor<[],f32> -> !torch.float
    %1753 = torch_c.to_f64 %1752
    %1754 = torch.aten.item %1751 : !torch.vtensor<[],si8> -> !torch.int
    %1755 = torch_c.to_i64 %1754
    %c1_1673 = arith.constant 1 : index
    %c1_1674 = arith.constant 1 : index
    %c512_1675 = arith.constant 512 : index
    %c2_1676 = arith.constant 2 : index
    %c28_1677 = arith.constant 28 : index
    %c3_1678 = arith.constant 3 : index
    %c28_1679 = arith.constant 28 : index
    %1756 = tensor.empty() : tensor<1x512x28x28xi8>
    %1757 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1671 : tensor<1x512x28x28xf32>) outs(%1756 : tensor<1x512x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1754
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1752
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x28x28xi8>
    %cast_1680 = tensor.cast %1757 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %cast_1681 = tensor.cast %cast_1680 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %1758 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1759 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1760 = torch.aten.item %1758 : !torch.vtensor<[],f32> -> !torch.float
    %1761 = torch_c.to_f64 %1760
    %1762 = torch.aten.item %1759 : !torch.vtensor<[],si8> -> !torch.int
    %1763 = torch_c.to_i64 %1762
    %cast_1682 = tensor.cast %cast_1681 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %c1_1683 = arith.constant 1 : index
    %c1_1684 = arith.constant 1 : index
    %c512_1685 = arith.constant 512 : index
    %c2_1686 = arith.constant 2 : index
    %c28_1687 = arith.constant 28 : index
    %c3_1688 = arith.constant 3 : index
    %c28_1689 = arith.constant 28 : index
    %1764 = tensor.empty() : tensor<1x512x28x28xf32>
    %1765 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1682 : tensor<1x512x28x28xi8>) outs(%1764 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1762
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1760
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_1690 = tensor.cast %1765 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %int1_1691 = torch.constant.int 1
    %1766 = torch_c.to_i64 %int1_1691
    %c1_1692 = arith.constant 1 : index
    %c1_1693 = arith.constant 1 : index
    %c512_1694 = arith.constant 512 : index
    %c2_1695 = arith.constant 2 : index
    %c28_1696 = arith.constant 28 : index
    %c3_1697 = arith.constant 3 : index
    %c28_1698 = arith.constant 28 : index
    %c1_1699 = arith.constant 1 : index
    %c512_1700 = arith.constant 512 : index
    %1767 = arith.cmpi eq, %c512_1694, %c512_1700 : index
    cf.assert %1767, "mismatched size for broadcast"
    %c2_1701 = arith.constant 2 : index
    %c28_1702 = arith.constant 28 : index
    %1768 = arith.cmpi eq, %c28_1696, %c28_1702 : index
    cf.assert %1768, "mismatched size for broadcast"
    %c3_1703 = arith.constant 3 : index
    %c28_1704 = arith.constant 28 : index
    %1769 = arith.cmpi eq, %c28_1698, %c28_1704 : index
    cf.assert %1769, "mismatched size for broadcast"
    %1770 = tensor.empty() : tensor<1x512x28x28xf32>
    %1771 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1596, %cast_1690 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%1770 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %1766 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_1705 = tensor.cast %1771 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %c1_1706 = arith.constant 1 : index
    %c1_1707 = arith.constant 1 : index
    %c512_1708 = arith.constant 512 : index
    %c2_1709 = arith.constant 2 : index
    %c28_1710 = arith.constant 28 : index
    %c3_1711 = arith.constant 3 : index
    %c28_1712 = arith.constant 28 : index
    %1772 = tensor.empty() : tensor<1x512x28x28xf32>
    %1773 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1705 : tensor<1x512x28x28xf32>) outs(%1772 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_1713 = tensor.cast %1773 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %1774 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1775 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1714 = torch.constant.int 12
    %1776 = torch.aten.item %1774 : !torch.vtensor<[],f32> -> !torch.float
    %1777 = torch_c.to_f64 %1776
    %1778 = torch.aten.item %1775 : !torch.vtensor<[],si8> -> !torch.int
    %1779 = torch_c.to_i64 %1778
    %c1_1715 = arith.constant 1 : index
    %c1_1716 = arith.constant 1 : index
    %c512_1717 = arith.constant 512 : index
    %c2_1718 = arith.constant 2 : index
    %c28_1719 = arith.constant 28 : index
    %c3_1720 = arith.constant 3 : index
    %c28_1721 = arith.constant 28 : index
    %1780 = tensor.empty() : tensor<1x512x28x28xi8>
    %1781 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1713 : tensor<1x512x28x28xf32>) outs(%1780 : tensor<1x512x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1778
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1776
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x28x28xi8>
    %cast_1722 = tensor.cast %1781 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %cast_1723 = tensor.cast %cast_1722 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %1782 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1783 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1784 = torch.aten.item %1782 : !torch.vtensor<[],f32> -> !torch.float
    %1785 = torch_c.to_f64 %1784
    %1786 = torch.aten.item %1783 : !torch.vtensor<[],si8> -> !torch.int
    %1787 = torch_c.to_i64 %1786
    %cast_1724 = tensor.cast %cast_1723 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %c1_1725 = arith.constant 1 : index
    %c1_1726 = arith.constant 1 : index
    %c512_1727 = arith.constant 512 : index
    %c2_1728 = arith.constant 2 : index
    %c28_1729 = arith.constant 28 : index
    %c3_1730 = arith.constant 3 : index
    %c28_1731 = arith.constant 28 : index
    %1788 = tensor.empty() : tensor<1x512x28x28xf32>
    %1789 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1724 : tensor<1x512x28x28xi8>) outs(%1788 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1786
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1784
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_1732 = tensor.cast %1789 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %1790 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %1791 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1733 = torch.constant.int 12
    %1792 = torch.aten.item %1790 : !torch.vtensor<[],f32> -> !torch.float
    %1793 = torch_c.to_f64 %1792
    %1794 = torch.aten.item %1791 : !torch.vtensor<[],si8> -> !torch.int
    %1795 = torch_c.to_i64 %1794
    %c1_1734 = arith.constant 1 : index
    %c0_1735 = arith.constant 0 : index
    %c128_1736 = arith.constant 128 : index
    %c1_1737 = arith.constant 1 : index
    %c512_1738 = arith.constant 512 : index
    %1796 = tensor.empty() : tensor<128x512x1x1xi8>
    %1797 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%62 : tensor<128x512x1x1xf32>) outs(%1796 : tensor<128x512x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1794
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1792
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128x512x1x1xi8>
    %cast_1739 = tensor.cast %1797 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
    %cast_1740 = tensor.cast %cast_1739 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
    %1798 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %1799 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1800 = torch.aten.item %1798 : !torch.vtensor<[],f32> -> !torch.float
    %1801 = torch_c.to_f64 %1800
    %1802 = torch.aten.item %1799 : !torch.vtensor<[],si8> -> !torch.int
    %1803 = torch_c.to_i64 %1802
    %cast_1741 = tensor.cast %cast_1740 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
    %c1_1742 = arith.constant 1 : index
    %c0_1743 = arith.constant 0 : index
    %c128_1744 = arith.constant 128 : index
    %c1_1745 = arith.constant 1 : index
    %c512_1746 = arith.constant 512 : index
    %1804 = tensor.empty() : tensor<128x512x1x1xf32>
    %1805 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1741 : tensor<128x512x1x1xi8>) outs(%1804 : tensor<128x512x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1802
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1800
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128x512x1x1xf32>
    %cast_1747 = tensor.cast %1805 : tensor<128x512x1x1xf32> to tensor<128x512x1x1xf32>
    %1806 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1807 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1748 = torch.constant.int 12
    %1808 = torch.aten.item %1806 : !torch.vtensor<[],f32> -> !torch.float
    %1809 = torch_c.to_f64 %1808
    %1810 = torch.aten.item %1807 : !torch.vtensor<[],si8> -> !torch.int
    %1811 = torch_c.to_i64 %1810
    %c1_1749 = arith.constant 1 : index
    %c0_1750 = arith.constant 0 : index
    %c128_1751 = arith.constant 128 : index
    %1812 = tensor.empty() : tensor<128xi8>
    %1813 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%64 : tensor<128xf32>) outs(%1812 : tensor<128xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1810
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1808
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128xi8>
    %cast_1752 = tensor.cast %1813 : tensor<128xi8> to tensor<128xi8>
    %cast_1753 = tensor.cast %cast_1752 : tensor<128xi8> to tensor<128xi8>
    %1814 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1815 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1816 = torch.aten.item %1814 : !torch.vtensor<[],f32> -> !torch.float
    %1817 = torch_c.to_f64 %1816
    %1818 = torch.aten.item %1815 : !torch.vtensor<[],si8> -> !torch.int
    %1819 = torch_c.to_i64 %1818
    %cast_1754 = tensor.cast %cast_1753 : tensor<128xi8> to tensor<128xi8>
    %c1_1755 = arith.constant 1 : index
    %c0_1756 = arith.constant 0 : index
    %c128_1757 = arith.constant 128 : index
    %1820 = tensor.empty() : tensor<128xf32>
    %1821 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1754 : tensor<128xi8>) outs(%1820 : tensor<128xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1818
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1816
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128xf32>
    %cast_1758 = tensor.cast %1821 : tensor<128xf32> to tensor<128xf32>
    %int0_1759 = torch.constant.int 0
    %int0_1760 = torch.constant.int 0
    %int1_1761 = torch.constant.int 1
    %int1_1762 = torch.constant.int 1
    %int1_1763 = torch.constant.int 1
    %int1_1764 = torch.constant.int 1
    %int0_1765 = torch.constant.int 0
    %1822 = torch.prim.ListConstruct %int0_1759, %int0_1760 : (!torch.int, !torch.int) -> !torch.list<int>
    %1823 = torch.prim.ListConstruct %int1_1761, %int1_1762 : (!torch.int, !torch.int) -> !torch.list<int>
    %1824 = torch.prim.ListConstruct %int1_1763, %int1_1764 : (!torch.int, !torch.int) -> !torch.list<int>
    %1825 = torch.prim.ListConstruct %int0_1765, %int0_1765 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_1766 = torch.constant.bool false
    %int1_1767 = torch.constant.int 1
    %1826 = torch_c.to_i64 %int1_1767
    %1827 = torch_c.to_i64 %int0_1759
    %1828 = torch_c.to_i64 %int0_1760
    %1829 = torch_c.to_i64 %int0_1765
    %1830 = torch_c.to_i64 %int0_1765
    %c0_1768 = arith.constant 0 : index
    %c1_1769 = arith.constant 1 : index
    %c1_1770 = arith.constant 1 : index
    %c512_1771 = arith.constant 512 : index
    %c2_1772 = arith.constant 2 : index
    %c28_1773 = arith.constant 28 : index
    %c3_1774 = arith.constant 3 : index
    %c28_1775 = arith.constant 28 : index
    %c0_1776 = arith.constant 0 : index
    %c128_1777 = arith.constant 128 : index
    %c1_1778 = arith.constant 1 : index
    %c512_1779 = arith.constant 512 : index
    %c2_1780 = arith.constant 2 : index
    %c1_1781 = arith.constant 1 : index
    %c3_1782 = arith.constant 3 : index
    %c1_1783 = arith.constant 1 : index
    %1831 = arith.index_cast %1826 : i64 to index
    %c0_1784 = arith.constant 0 : index
    %1832 = arith.remsi %c512_1771, %1831 : index
    %1833 = arith.cmpi eq, %c0_1784, %1832 : index
    cf.assert %1833, "invalid: groups must divide input channel size evenly."
    %c0_1785 = arith.constant 0 : index
    %1834 = arith.remsi %c128_1777, %1831 : index
    %1835 = arith.cmpi eq, %c0_1785, %1834 : index
    cf.assert %1835, "invalid: groups must divide weight batch size evenly."
    %c1_i64_1786 = arith.constant 1 : i64
    %c1_i64_1787 = arith.constant 1 : i64
    %c1_i64_1788 = arith.constant 1 : i64
    %c1_i64_1789 = arith.constant 1 : i64
    %cst_1790 = arith.constant 0.000000e+00 : f32
    %c0_1791 = arith.constant 0 : index
    %c1_1792 = arith.constant 1 : index
    %c1_1793 = arith.constant 1 : index
    %c512_1794 = arith.constant 512 : index
    %c2_1795 = arith.constant 2 : index
    %c28_1796 = arith.constant 28 : index
    %c3_1797 = arith.constant 3 : index
    %c28_1798 = arith.constant 28 : index
    %c0_i64_1799 = arith.constant 0 : i64
    %1836 = arith.index_cast %c0_i64_1799 : i64 to index
    %1837 = arith.index_cast %c0_i64_1799 : i64 to index
    %1838 = arith.index_cast %1827 : i64 to index
    %1839 = arith.index_cast %1828 : i64 to index
    %padded_1800 = tensor.pad %cast_1732 low[%1836, %1837, %1838, %1839] high[%1836, %1837, %1838, %1839] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_1790 : f32
    } : tensor<1x512x28x28xf32> to tensor<?x?x?x?xf32>
    %1840 = arith.index_cast %c1_1781 : index to i64
    %c1_i64_1801 = arith.constant 1 : i64
    %c2_i64_1802 = arith.constant 2 : i64
    %1841 = arith.muli %1827, %c2_i64_1802 : i64
    %1842 = arith.index_cast %c28_1773 : index to i64
    %1843 = arith.addi %1842, %1841 : i64
    %1844 = arith.subi %1840, %c1_i64_1801 : i64
    %1845 = arith.muli %c1_i64_1786, %1844 : i64
    %1846 = arith.subi %1843, %1845 : i64
    %1847 = arith.subi %1846, %c1_i64_1801 : i64
    %1848 = arith.floordivsi %1847, %c1_i64_1788 : i64
    %1849 = arith.addi %1848, %c1_i64_1801 : i64
    %1850 = arith.index_cast %1849 : i64 to index
    %1851 = arith.index_cast %c1_1783 : index to i64
    %c1_i64_1803 = arith.constant 1 : i64
    %c2_i64_1804 = arith.constant 2 : i64
    %1852 = arith.muli %1828, %c2_i64_1804 : i64
    %1853 = arith.index_cast %c28_1775 : index to i64
    %1854 = arith.addi %1853, %1852 : i64
    %1855 = arith.subi %1851, %c1_i64_1803 : i64
    %1856 = arith.muli %c1_i64_1787, %1855 : i64
    %1857 = arith.subi %1854, %1856 : i64
    %1858 = arith.subi %1857, %c1_i64_1803 : i64
    %1859 = arith.floordivsi %1858, %c1_i64_1789 : i64
    %1860 = arith.addi %1859, %c1_i64_1803 : i64
    %1861 = arith.index_cast %1860 : i64 to index
    %1862 = tensor.empty(%1850, %1861) : tensor<1x128x?x?xf32>
    %1863 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1758 : tensor<128xf32>) outs(%1862 : tensor<1x128x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x128x?x?xf32>
    %1864 = arith.floordivsi %c512_1771, %1831 : index
    %1865 = arith.floordivsi %c128_1777, %1831 : index
    %c0_1805 = arith.constant 0 : index
    %c1_1806 = arith.constant 1 : index
    %1866 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1800, %cast_1747 : tensor<?x?x?x?xf32>, tensor<128x512x1x1xf32>) outs(%1863 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
    %cast_1807 = tensor.cast %1866 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
    %c1_1808 = arith.constant 1 : index
    %c1_1809 = arith.constant 1 : index
    %c128_1810 = arith.constant 128 : index
    %c2_1811 = arith.constant 2 : index
    %c28_1812 = arith.constant 28 : index
    %c3_1813 = arith.constant 3 : index
    %c28_1814 = arith.constant 28 : index
    %1867 = tensor.empty() : tensor<1x128x28x28xf32>
    %1868 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1807 : tensor<1x128x28x28xf32>) outs(%1867 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_1815 = tensor.cast %1868 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %1869 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1870 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1816 = torch.constant.int 12
    %1871 = torch.aten.item %1869 : !torch.vtensor<[],f32> -> !torch.float
    %1872 = torch_c.to_f64 %1871
    %1873 = torch.aten.item %1870 : !torch.vtensor<[],si8> -> !torch.int
    %1874 = torch_c.to_i64 %1873
    %c1_1817 = arith.constant 1 : index
    %c1_1818 = arith.constant 1 : index
    %c128_1819 = arith.constant 128 : index
    %c2_1820 = arith.constant 2 : index
    %c28_1821 = arith.constant 28 : index
    %c3_1822 = arith.constant 3 : index
    %c28_1823 = arith.constant 28 : index
    %1875 = tensor.empty() : tensor<1x128x28x28xi8>
    %1876 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1815 : tensor<1x128x28x28xf32>) outs(%1875 : tensor<1x128x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1873
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1871
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x128x28x28xi8>
    %cast_1824 = tensor.cast %1876 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %cast_1825 = tensor.cast %cast_1824 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %1877 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1878 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1879 = torch.aten.item %1877 : !torch.vtensor<[],f32> -> !torch.float
    %1880 = torch_c.to_f64 %1879
    %1881 = torch.aten.item %1878 : !torch.vtensor<[],si8> -> !torch.int
    %1882 = torch_c.to_i64 %1881
    %cast_1826 = tensor.cast %cast_1825 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %c1_1827 = arith.constant 1 : index
    %c1_1828 = arith.constant 1 : index
    %c128_1829 = arith.constant 128 : index
    %c2_1830 = arith.constant 2 : index
    %c28_1831 = arith.constant 28 : index
    %c3_1832 = arith.constant 3 : index
    %c28_1833 = arith.constant 28 : index
    %1883 = tensor.empty() : tensor<1x128x28x28xf32>
    %1884 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1826 : tensor<1x128x28x28xi8>) outs(%1883 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1881
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1879
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_1834 = tensor.cast %1884 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %1885 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1886 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1835 = torch.constant.int 12
    %1887 = torch.aten.item %1885 : !torch.vtensor<[],f32> -> !torch.float
    %1888 = torch_c.to_f64 %1887
    %1889 = torch.aten.item %1886 : !torch.vtensor<[],si8> -> !torch.int
    %1890 = torch_c.to_i64 %1889
    %c1_1836 = arith.constant 1 : index
    %c0_1837 = arith.constant 0 : index
    %c128_1838 = arith.constant 128 : index
    %c1_1839 = arith.constant 1 : index
    %c128_1840 = arith.constant 128 : index
    %c2_1841 = arith.constant 2 : index
    %c3_1842 = arith.constant 3 : index
    %c3_1843 = arith.constant 3 : index
    %c3_1844 = arith.constant 3 : index
    %1891 = tensor.empty() : tensor<128x128x3x3xi8>
    %1892 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%66 : tensor<128x128x3x3xf32>) outs(%1891 : tensor<128x128x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1889
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1887
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128x128x3x3xi8>
    %cast_1845 = tensor.cast %1892 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
    %cast_1846 = tensor.cast %cast_1845 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
    %1893 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1894 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1895 = torch.aten.item %1893 : !torch.vtensor<[],f32> -> !torch.float
    %1896 = torch_c.to_f64 %1895
    %1897 = torch.aten.item %1894 : !torch.vtensor<[],si8> -> !torch.int
    %1898 = torch_c.to_i64 %1897
    %cast_1847 = tensor.cast %cast_1846 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
    %c1_1848 = arith.constant 1 : index
    %c0_1849 = arith.constant 0 : index
    %c128_1850 = arith.constant 128 : index
    %c1_1851 = arith.constant 1 : index
    %c128_1852 = arith.constant 128 : index
    %c2_1853 = arith.constant 2 : index
    %c3_1854 = arith.constant 3 : index
    %c3_1855 = arith.constant 3 : index
    %c3_1856 = arith.constant 3 : index
    %1899 = tensor.empty() : tensor<128x128x3x3xf32>
    %1900 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1847 : tensor<128x128x3x3xi8>) outs(%1899 : tensor<128x128x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1897
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1895
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128x128x3x3xf32>
    %cast_1857 = tensor.cast %1900 : tensor<128x128x3x3xf32> to tensor<128x128x3x3xf32>
    %1901 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1902 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1858 = torch.constant.int 12
    %1903 = torch.aten.item %1901 : !torch.vtensor<[],f32> -> !torch.float
    %1904 = torch_c.to_f64 %1903
    %1905 = torch.aten.item %1902 : !torch.vtensor<[],si8> -> !torch.int
    %1906 = torch_c.to_i64 %1905
    %c1_1859 = arith.constant 1 : index
    %c0_1860 = arith.constant 0 : index
    %c128_1861 = arith.constant 128 : index
    %1907 = tensor.empty() : tensor<128xi8>
    %1908 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%68 : tensor<128xf32>) outs(%1907 : tensor<128xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1905
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1903
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128xi8>
    %cast_1862 = tensor.cast %1908 : tensor<128xi8> to tensor<128xi8>
    %cast_1863 = tensor.cast %cast_1862 : tensor<128xi8> to tensor<128xi8>
    %1909 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1910 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1911 = torch.aten.item %1909 : !torch.vtensor<[],f32> -> !torch.float
    %1912 = torch_c.to_f64 %1911
    %1913 = torch.aten.item %1910 : !torch.vtensor<[],si8> -> !torch.int
    %1914 = torch_c.to_i64 %1913
    %cast_1864 = tensor.cast %cast_1863 : tensor<128xi8> to tensor<128xi8>
    %c1_1865 = arith.constant 1 : index
    %c0_1866 = arith.constant 0 : index
    %c128_1867 = arith.constant 128 : index
    %1915 = tensor.empty() : tensor<128xf32>
    %1916 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1864 : tensor<128xi8>) outs(%1915 : tensor<128xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1913
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1911
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128xf32>
    %cast_1868 = tensor.cast %1916 : tensor<128xf32> to tensor<128xf32>
    %int1_1869 = torch.constant.int 1
    %int1_1870 = torch.constant.int 1
    %int1_1871 = torch.constant.int 1
    %int1_1872 = torch.constant.int 1
    %int1_1873 = torch.constant.int 1
    %int1_1874 = torch.constant.int 1
    %int0_1875 = torch.constant.int 0
    %1917 = torch.prim.ListConstruct %int1_1869, %int1_1870 : (!torch.int, !torch.int) -> !torch.list<int>
    %1918 = torch.prim.ListConstruct %int1_1871, %int1_1872 : (!torch.int, !torch.int) -> !torch.list<int>
    %1919 = torch.prim.ListConstruct %int1_1873, %int1_1874 : (!torch.int, !torch.int) -> !torch.list<int>
    %1920 = torch.prim.ListConstruct %int0_1875, %int0_1875 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_1876 = torch.constant.bool false
    %int1_1877 = torch.constant.int 1
    %1921 = torch_c.to_i64 %int1_1877
    %1922 = torch_c.to_i64 %int1_1869
    %1923 = torch_c.to_i64 %int1_1870
    %1924 = torch_c.to_i64 %int0_1875
    %1925 = torch_c.to_i64 %int0_1875
    %c0_1878 = arith.constant 0 : index
    %c1_1879 = arith.constant 1 : index
    %c1_1880 = arith.constant 1 : index
    %c128_1881 = arith.constant 128 : index
    %c2_1882 = arith.constant 2 : index
    %c28_1883 = arith.constant 28 : index
    %c3_1884 = arith.constant 3 : index
    %c28_1885 = arith.constant 28 : index
    %c0_1886 = arith.constant 0 : index
    %c128_1887 = arith.constant 128 : index
    %c1_1888 = arith.constant 1 : index
    %c128_1889 = arith.constant 128 : index
    %c2_1890 = arith.constant 2 : index
    %c3_1891 = arith.constant 3 : index
    %c3_1892 = arith.constant 3 : index
    %c3_1893 = arith.constant 3 : index
    %1926 = arith.index_cast %1921 : i64 to index
    %c0_1894 = arith.constant 0 : index
    %1927 = arith.remsi %c128_1881, %1926 : index
    %1928 = arith.cmpi eq, %c0_1894, %1927 : index
    cf.assert %1928, "invalid: groups must divide input channel size evenly."
    %c0_1895 = arith.constant 0 : index
    %1929 = arith.remsi %c128_1887, %1926 : index
    %1930 = arith.cmpi eq, %c0_1895, %1929 : index
    cf.assert %1930, "invalid: groups must divide weight batch size evenly."
    %c1_i64_1896 = arith.constant 1 : i64
    %c1_i64_1897 = arith.constant 1 : i64
    %c1_i64_1898 = arith.constant 1 : i64
    %c1_i64_1899 = arith.constant 1 : i64
    %cst_1900 = arith.constant 0.000000e+00 : f32
    %c0_1901 = arith.constant 0 : index
    %c1_1902 = arith.constant 1 : index
    %c1_1903 = arith.constant 1 : index
    %c128_1904 = arith.constant 128 : index
    %c2_1905 = arith.constant 2 : index
    %c28_1906 = arith.constant 28 : index
    %c3_1907 = arith.constant 3 : index
    %c28_1908 = arith.constant 28 : index
    %c0_i64_1909 = arith.constant 0 : i64
    %1931 = arith.index_cast %c0_i64_1909 : i64 to index
    %1932 = arith.index_cast %c0_i64_1909 : i64 to index
    %1933 = arith.index_cast %1922 : i64 to index
    %1934 = arith.index_cast %1923 : i64 to index
    %padded_1910 = tensor.pad %cast_1834 low[%1931, %1932, %1933, %1934] high[%1931, %1932, %1933, %1934] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_1900 : f32
    } : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
    %1935 = arith.index_cast %c3_1891 : index to i64
    %c1_i64_1911 = arith.constant 1 : i64
    %c2_i64_1912 = arith.constant 2 : i64
    %1936 = arith.muli %1922, %c2_i64_1912 : i64
    %1937 = arith.index_cast %c28_1883 : index to i64
    %1938 = arith.addi %1937, %1936 : i64
    %1939 = arith.subi %1935, %c1_i64_1911 : i64
    %1940 = arith.muli %c1_i64_1896, %1939 : i64
    %1941 = arith.subi %1938, %1940 : i64
    %1942 = arith.subi %1941, %c1_i64_1911 : i64
    %1943 = arith.floordivsi %1942, %c1_i64_1898 : i64
    %1944 = arith.addi %1943, %c1_i64_1911 : i64
    %1945 = arith.index_cast %1944 : i64 to index
    %1946 = arith.index_cast %c3_1893 : index to i64
    %c1_i64_1913 = arith.constant 1 : i64
    %c2_i64_1914 = arith.constant 2 : i64
    %1947 = arith.muli %1923, %c2_i64_1914 : i64
    %1948 = arith.index_cast %c28_1885 : index to i64
    %1949 = arith.addi %1948, %1947 : i64
    %1950 = arith.subi %1946, %c1_i64_1913 : i64
    %1951 = arith.muli %c1_i64_1897, %1950 : i64
    %1952 = arith.subi %1949, %1951 : i64
    %1953 = arith.subi %1952, %c1_i64_1913 : i64
    %1954 = arith.floordivsi %1953, %c1_i64_1899 : i64
    %1955 = arith.addi %1954, %c1_i64_1913 : i64
    %1956 = arith.index_cast %1955 : i64 to index
    %1957 = tensor.empty(%1945, %1956) : tensor<1x128x?x?xf32>
    %1958 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1868 : tensor<128xf32>) outs(%1957 : tensor<1x128x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x128x?x?xf32>
    %1959 = arith.floordivsi %c128_1881, %1926 : index
    %1960 = arith.floordivsi %c128_1887, %1926 : index
    %c0_1915 = arith.constant 0 : index
    %c1_1916 = arith.constant 1 : index
    %1961 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_1910, %cast_1857 : tensor<?x?x?x?xf32>, tensor<128x128x3x3xf32>) outs(%1958 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
    %cast_1917 = tensor.cast %1961 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
    %c1_1918 = arith.constant 1 : index
    %c1_1919 = arith.constant 1 : index
    %c128_1920 = arith.constant 128 : index
    %c2_1921 = arith.constant 2 : index
    %c28_1922 = arith.constant 28 : index
    %c3_1923 = arith.constant 3 : index
    %c28_1924 = arith.constant 28 : index
    %1962 = tensor.empty() : tensor<1x128x28x28xf32>
    %1963 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1917 : tensor<1x128x28x28xf32>) outs(%1962 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_1925 = tensor.cast %1963 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %1964 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1965 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1926 = torch.constant.int 12
    %1966 = torch.aten.item %1964 : !torch.vtensor<[],f32> -> !torch.float
    %1967 = torch_c.to_f64 %1966
    %1968 = torch.aten.item %1965 : !torch.vtensor<[],si8> -> !torch.int
    %1969 = torch_c.to_i64 %1968
    %c1_1927 = arith.constant 1 : index
    %c1_1928 = arith.constant 1 : index
    %c128_1929 = arith.constant 128 : index
    %c2_1930 = arith.constant 2 : index
    %c28_1931 = arith.constant 28 : index
    %c3_1932 = arith.constant 3 : index
    %c28_1933 = arith.constant 28 : index
    %1970 = tensor.empty() : tensor<1x128x28x28xi8>
    %1971 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1925 : tensor<1x128x28x28xf32>) outs(%1970 : tensor<1x128x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1968
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1966
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x128x28x28xi8>
    %cast_1934 = tensor.cast %1971 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %cast_1935 = tensor.cast %cast_1934 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %1972 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1973 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1974 = torch.aten.item %1972 : !torch.vtensor<[],f32> -> !torch.float
    %1975 = torch_c.to_f64 %1974
    %1976 = torch.aten.item %1973 : !torch.vtensor<[],si8> -> !torch.int
    %1977 = torch_c.to_i64 %1976
    %cast_1936 = tensor.cast %cast_1935 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %c1_1937 = arith.constant 1 : index
    %c1_1938 = arith.constant 1 : index
    %c128_1939 = arith.constant 128 : index
    %c2_1940 = arith.constant 2 : index
    %c28_1941 = arith.constant 28 : index
    %c3_1942 = arith.constant 3 : index
    %c28_1943 = arith.constant 28 : index
    %1978 = tensor.empty() : tensor<1x128x28x28xf32>
    %1979 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1936 : tensor<1x128x28x28xi8>) outs(%1978 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1976
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1974
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_1944 = tensor.cast %1979 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %1980 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1981 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1945 = torch.constant.int 12
    %1982 = torch.aten.item %1980 : !torch.vtensor<[],f32> -> !torch.float
    %1983 = torch_c.to_f64 %1982
    %1984 = torch.aten.item %1981 : !torch.vtensor<[],si8> -> !torch.int
    %1985 = torch_c.to_i64 %1984
    %c1_1946 = arith.constant 1 : index
    %c0_1947 = arith.constant 0 : index
    %c512_1948 = arith.constant 512 : index
    %c1_1949 = arith.constant 1 : index
    %c128_1950 = arith.constant 128 : index
    %1986 = tensor.empty() : tensor<512x128x1x1xi8>
    %1987 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%70 : tensor<512x128x1x1xf32>) outs(%1986 : tensor<512x128x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %1984
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1982
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512x128x1x1xi8>
    %cast_1951 = tensor.cast %1987 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
    %cast_1952 = tensor.cast %cast_1951 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
    %1988 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1989 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %1990 = torch.aten.item %1988 : !torch.vtensor<[],f32> -> !torch.float
    %1991 = torch_c.to_f64 %1990
    %1992 = torch.aten.item %1989 : !torch.vtensor<[],si8> -> !torch.int
    %1993 = torch_c.to_i64 %1992
    %cast_1953 = tensor.cast %cast_1952 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
    %c1_1954 = arith.constant 1 : index
    %c0_1955 = arith.constant 0 : index
    %c512_1956 = arith.constant 512 : index
    %c1_1957 = arith.constant 1 : index
    %c128_1958 = arith.constant 128 : index
    %1994 = tensor.empty() : tensor<512x128x1x1xf32>
    %1995 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1953 : tensor<512x128x1x1xi8>) outs(%1994 : tensor<512x128x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %1992
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %1990
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512x128x1x1xf32>
    %cast_1959 = tensor.cast %1995 : tensor<512x128x1x1xf32> to tensor<512x128x1x1xf32>
    %1996 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %1997 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_1960 = torch.constant.int 12
    %1998 = torch.aten.item %1996 : !torch.vtensor<[],f32> -> !torch.float
    %1999 = torch_c.to_f64 %1998
    %2000 = torch.aten.item %1997 : !torch.vtensor<[],si8> -> !torch.int
    %2001 = torch_c.to_i64 %2000
    %c1_1961 = arith.constant 1 : index
    %c0_1962 = arith.constant 0 : index
    %c512_1963 = arith.constant 512 : index
    %2002 = tensor.empty() : tensor<512xi8>
    %2003 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%72 : tensor<512xf32>) outs(%2002 : tensor<512xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2000
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %1998
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512xi8>
    %cast_1964 = tensor.cast %2003 : tensor<512xi8> to tensor<512xi8>
    %cast_1965 = tensor.cast %cast_1964 : tensor<512xi8> to tensor<512xi8>
    %2004 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2005 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2006 = torch.aten.item %2004 : !torch.vtensor<[],f32> -> !torch.float
    %2007 = torch_c.to_f64 %2006
    %2008 = torch.aten.item %2005 : !torch.vtensor<[],si8> -> !torch.int
    %2009 = torch_c.to_i64 %2008
    %cast_1966 = tensor.cast %cast_1965 : tensor<512xi8> to tensor<512xi8>
    %c1_1967 = arith.constant 1 : index
    %c0_1968 = arith.constant 0 : index
    %c512_1969 = arith.constant 512 : index
    %2010 = tensor.empty() : tensor<512xf32>
    %2011 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_1966 : tensor<512xi8>) outs(%2010 : tensor<512xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2008
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2006
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512xf32>
    %cast_1970 = tensor.cast %2011 : tensor<512xf32> to tensor<512xf32>
    %int0_1971 = torch.constant.int 0
    %int0_1972 = torch.constant.int 0
    %int1_1973 = torch.constant.int 1
    %int1_1974 = torch.constant.int 1
    %int1_1975 = torch.constant.int 1
    %int1_1976 = torch.constant.int 1
    %int0_1977 = torch.constant.int 0
    %2012 = torch.prim.ListConstruct %int0_1971, %int0_1972 : (!torch.int, !torch.int) -> !torch.list<int>
    %2013 = torch.prim.ListConstruct %int1_1973, %int1_1974 : (!torch.int, !torch.int) -> !torch.list<int>
    %2014 = torch.prim.ListConstruct %int1_1975, %int1_1976 : (!torch.int, !torch.int) -> !torch.list<int>
    %2015 = torch.prim.ListConstruct %int0_1977, %int0_1977 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_1978 = torch.constant.bool false
    %int1_1979 = torch.constant.int 1
    %2016 = torch_c.to_i64 %int1_1979
    %2017 = torch_c.to_i64 %int0_1971
    %2018 = torch_c.to_i64 %int0_1972
    %2019 = torch_c.to_i64 %int0_1977
    %2020 = torch_c.to_i64 %int0_1977
    %c0_1980 = arith.constant 0 : index
    %c1_1981 = arith.constant 1 : index
    %c1_1982 = arith.constant 1 : index
    %c128_1983 = arith.constant 128 : index
    %c2_1984 = arith.constant 2 : index
    %c28_1985 = arith.constant 28 : index
    %c3_1986 = arith.constant 3 : index
    %c28_1987 = arith.constant 28 : index
    %c0_1988 = arith.constant 0 : index
    %c512_1989 = arith.constant 512 : index
    %c1_1990 = arith.constant 1 : index
    %c128_1991 = arith.constant 128 : index
    %c2_1992 = arith.constant 2 : index
    %c1_1993 = arith.constant 1 : index
    %c3_1994 = arith.constant 3 : index
    %c1_1995 = arith.constant 1 : index
    %2021 = arith.index_cast %2016 : i64 to index
    %c0_1996 = arith.constant 0 : index
    %2022 = arith.remsi %c128_1983, %2021 : index
    %2023 = arith.cmpi eq, %c0_1996, %2022 : index
    cf.assert %2023, "invalid: groups must divide input channel size evenly."
    %c0_1997 = arith.constant 0 : index
    %2024 = arith.remsi %c512_1989, %2021 : index
    %2025 = arith.cmpi eq, %c0_1997, %2024 : index
    cf.assert %2025, "invalid: groups must divide weight batch size evenly."
    %c1_i64_1998 = arith.constant 1 : i64
    %c1_i64_1999 = arith.constant 1 : i64
    %c1_i64_2000 = arith.constant 1 : i64
    %c1_i64_2001 = arith.constant 1 : i64
    %cst_2002 = arith.constant 0.000000e+00 : f32
    %c0_2003 = arith.constant 0 : index
    %c1_2004 = arith.constant 1 : index
    %c1_2005 = arith.constant 1 : index
    %c128_2006 = arith.constant 128 : index
    %c2_2007 = arith.constant 2 : index
    %c28_2008 = arith.constant 28 : index
    %c3_2009 = arith.constant 3 : index
    %c28_2010 = arith.constant 28 : index
    %c0_i64_2011 = arith.constant 0 : i64
    %2026 = arith.index_cast %c0_i64_2011 : i64 to index
    %2027 = arith.index_cast %c0_i64_2011 : i64 to index
    %2028 = arith.index_cast %2017 : i64 to index
    %2029 = arith.index_cast %2018 : i64 to index
    %padded_2012 = tensor.pad %cast_1944 low[%2026, %2027, %2028, %2029] high[%2026, %2027, %2028, %2029] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_2002 : f32
    } : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
    %2030 = arith.index_cast %c1_1993 : index to i64
    %c1_i64_2013 = arith.constant 1 : i64
    %c2_i64_2014 = arith.constant 2 : i64
    %2031 = arith.muli %2017, %c2_i64_2014 : i64
    %2032 = arith.index_cast %c28_1985 : index to i64
    %2033 = arith.addi %2032, %2031 : i64
    %2034 = arith.subi %2030, %c1_i64_2013 : i64
    %2035 = arith.muli %c1_i64_1998, %2034 : i64
    %2036 = arith.subi %2033, %2035 : i64
    %2037 = arith.subi %2036, %c1_i64_2013 : i64
    %2038 = arith.floordivsi %2037, %c1_i64_2000 : i64
    %2039 = arith.addi %2038, %c1_i64_2013 : i64
    %2040 = arith.index_cast %2039 : i64 to index
    %2041 = arith.index_cast %c1_1995 : index to i64
    %c1_i64_2015 = arith.constant 1 : i64
    %c2_i64_2016 = arith.constant 2 : i64
    %2042 = arith.muli %2018, %c2_i64_2016 : i64
    %2043 = arith.index_cast %c28_1987 : index to i64
    %2044 = arith.addi %2043, %2042 : i64
    %2045 = arith.subi %2041, %c1_i64_2015 : i64
    %2046 = arith.muli %c1_i64_1999, %2045 : i64
    %2047 = arith.subi %2044, %2046 : i64
    %2048 = arith.subi %2047, %c1_i64_2015 : i64
    %2049 = arith.floordivsi %2048, %c1_i64_2001 : i64
    %2050 = arith.addi %2049, %c1_i64_2015 : i64
    %2051 = arith.index_cast %2050 : i64 to index
    %2052 = tensor.empty(%2040, %2051) : tensor<1x512x?x?xf32>
    %2053 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_1970 : tensor<512xf32>) outs(%2052 : tensor<1x512x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x512x?x?xf32>
    %2054 = arith.floordivsi %c128_1983, %2021 : index
    %2055 = arith.floordivsi %c512_1989, %2021 : index
    %c0_2017 = arith.constant 0 : index
    %c1_2018 = arith.constant 1 : index
    %2056 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2012, %cast_1959 : tensor<?x?x?x?xf32>, tensor<512x128x1x1xf32>) outs(%2053 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
    %cast_2019 = tensor.cast %2056 : tensor<1x512x?x?xf32> to tensor<1x512x28x28xf32>
    %2057 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2058 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2020 = torch.constant.int 12
    %2059 = torch.aten.item %2057 : !torch.vtensor<[],f32> -> !torch.float
    %2060 = torch_c.to_f64 %2059
    %2061 = torch.aten.item %2058 : !torch.vtensor<[],si8> -> !torch.int
    %2062 = torch_c.to_i64 %2061
    %c1_2021 = arith.constant 1 : index
    %c1_2022 = arith.constant 1 : index
    %c512_2023 = arith.constant 512 : index
    %c2_2024 = arith.constant 2 : index
    %c28_2025 = arith.constant 28 : index
    %c3_2026 = arith.constant 3 : index
    %c28_2027 = arith.constant 28 : index
    %2063 = tensor.empty() : tensor<1x512x28x28xi8>
    %2064 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2019 : tensor<1x512x28x28xf32>) outs(%2063 : tensor<1x512x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2061
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2059
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x28x28xi8>
    %cast_2028 = tensor.cast %2064 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %cast_2029 = tensor.cast %cast_2028 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %2065 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2066 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2067 = torch.aten.item %2065 : !torch.vtensor<[],f32> -> !torch.float
    %2068 = torch_c.to_f64 %2067
    %2069 = torch.aten.item %2066 : !torch.vtensor<[],si8> -> !torch.int
    %2070 = torch_c.to_i64 %2069
    %cast_2030 = tensor.cast %cast_2029 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %c1_2031 = arith.constant 1 : index
    %c1_2032 = arith.constant 1 : index
    %c512_2033 = arith.constant 512 : index
    %c2_2034 = arith.constant 2 : index
    %c28_2035 = arith.constant 28 : index
    %c3_2036 = arith.constant 3 : index
    %c28_2037 = arith.constant 28 : index
    %2071 = tensor.empty() : tensor<1x512x28x28xf32>
    %2072 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2030 : tensor<1x512x28x28xi8>) outs(%2071 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2069
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2067
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_2038 = tensor.cast %2072 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %int1_2039 = torch.constant.int 1
    %2073 = torch_c.to_i64 %int1_2039
    %c1_2040 = arith.constant 1 : index
    %c1_2041 = arith.constant 1 : index
    %c512_2042 = arith.constant 512 : index
    %c2_2043 = arith.constant 2 : index
    %c28_2044 = arith.constant 28 : index
    %c3_2045 = arith.constant 3 : index
    %c28_2046 = arith.constant 28 : index
    %c1_2047 = arith.constant 1 : index
    %c512_2048 = arith.constant 512 : index
    %2074 = arith.cmpi eq, %c512_2042, %c512_2048 : index
    cf.assert %2074, "mismatched size for broadcast"
    %c2_2049 = arith.constant 2 : index
    %c28_2050 = arith.constant 28 : index
    %2075 = arith.cmpi eq, %c28_2044, %c28_2050 : index
    cf.assert %2075, "mismatched size for broadcast"
    %c3_2051 = arith.constant 3 : index
    %c28_2052 = arith.constant 28 : index
    %2076 = arith.cmpi eq, %c28_2046, %c28_2052 : index
    cf.assert %2076, "mismatched size for broadcast"
    %2077 = tensor.empty() : tensor<1x512x28x28xf32>
    %2078 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2038, %cast_1732 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%2077 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %2073 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_2053 = tensor.cast %2078 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %c1_2054 = arith.constant 1 : index
    %c1_2055 = arith.constant 1 : index
    %c512_2056 = arith.constant 512 : index
    %c2_2057 = arith.constant 2 : index
    %c28_2058 = arith.constant 28 : index
    %c3_2059 = arith.constant 3 : index
    %c28_2060 = arith.constant 28 : index
    %2079 = tensor.empty() : tensor<1x512x28x28xf32>
    %2080 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2053 : tensor<1x512x28x28xf32>) outs(%2079 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_2061 = tensor.cast %2080 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %2081 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2082 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2062 = torch.constant.int 12
    %2083 = torch.aten.item %2081 : !torch.vtensor<[],f32> -> !torch.float
    %2084 = torch_c.to_f64 %2083
    %2085 = torch.aten.item %2082 : !torch.vtensor<[],si8> -> !torch.int
    %2086 = torch_c.to_i64 %2085
    %c1_2063 = arith.constant 1 : index
    %c1_2064 = arith.constant 1 : index
    %c512_2065 = arith.constant 512 : index
    %c2_2066 = arith.constant 2 : index
    %c28_2067 = arith.constant 28 : index
    %c3_2068 = arith.constant 3 : index
    %c28_2069 = arith.constant 28 : index
    %2087 = tensor.empty() : tensor<1x512x28x28xi8>
    %2088 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2061 : tensor<1x512x28x28xf32>) outs(%2087 : tensor<1x512x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2085
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2083
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x28x28xi8>
    %cast_2070 = tensor.cast %2088 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %cast_2071 = tensor.cast %cast_2070 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %2089 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2090 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2091 = torch.aten.item %2089 : !torch.vtensor<[],f32> -> !torch.float
    %2092 = torch_c.to_f64 %2091
    %2093 = torch.aten.item %2090 : !torch.vtensor<[],si8> -> !torch.int
    %2094 = torch_c.to_i64 %2093
    %cast_2072 = tensor.cast %cast_2071 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %c1_2073 = arith.constant 1 : index
    %c1_2074 = arith.constant 1 : index
    %c512_2075 = arith.constant 512 : index
    %c2_2076 = arith.constant 2 : index
    %c28_2077 = arith.constant 28 : index
    %c3_2078 = arith.constant 3 : index
    %c28_2079 = arith.constant 28 : index
    %2095 = tensor.empty() : tensor<1x512x28x28xf32>
    %2096 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2072 : tensor<1x512x28x28xi8>) outs(%2095 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2093
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2091
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_2080 = tensor.cast %2096 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %2097 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %2098 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2081 = torch.constant.int 12
    %2099 = torch.aten.item %2097 : !torch.vtensor<[],f32> -> !torch.float
    %2100 = torch_c.to_f64 %2099
    %2101 = torch.aten.item %2098 : !torch.vtensor<[],si8> -> !torch.int
    %2102 = torch_c.to_i64 %2101
    %c1_2082 = arith.constant 1 : index
    %c0_2083 = arith.constant 0 : index
    %c128_2084 = arith.constant 128 : index
    %c1_2085 = arith.constant 1 : index
    %c512_2086 = arith.constant 512 : index
    %2103 = tensor.empty() : tensor<128x512x1x1xi8>
    %2104 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%74 : tensor<128x512x1x1xf32>) outs(%2103 : tensor<128x512x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2101
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2099
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128x512x1x1xi8>
    %cast_2087 = tensor.cast %2104 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
    %cast_2088 = tensor.cast %cast_2087 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
    %2105 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %2106 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2107 = torch.aten.item %2105 : !torch.vtensor<[],f32> -> !torch.float
    %2108 = torch_c.to_f64 %2107
    %2109 = torch.aten.item %2106 : !torch.vtensor<[],si8> -> !torch.int
    %2110 = torch_c.to_i64 %2109
    %cast_2089 = tensor.cast %cast_2088 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
    %c1_2090 = arith.constant 1 : index
    %c0_2091 = arith.constant 0 : index
    %c128_2092 = arith.constant 128 : index
    %c1_2093 = arith.constant 1 : index
    %c512_2094 = arith.constant 512 : index
    %2111 = tensor.empty() : tensor<128x512x1x1xf32>
    %2112 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2089 : tensor<128x512x1x1xi8>) outs(%2111 : tensor<128x512x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2109
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2107
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128x512x1x1xf32>
    %cast_2095 = tensor.cast %2112 : tensor<128x512x1x1xf32> to tensor<128x512x1x1xf32>
    %2113 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2114 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2096 = torch.constant.int 12
    %2115 = torch.aten.item %2113 : !torch.vtensor<[],f32> -> !torch.float
    %2116 = torch_c.to_f64 %2115
    %2117 = torch.aten.item %2114 : !torch.vtensor<[],si8> -> !torch.int
    %2118 = torch_c.to_i64 %2117
    %c1_2097 = arith.constant 1 : index
    %c0_2098 = arith.constant 0 : index
    %c128_2099 = arith.constant 128 : index
    %2119 = tensor.empty() : tensor<128xi8>
    %2120 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%76 : tensor<128xf32>) outs(%2119 : tensor<128xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2117
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2115
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128xi8>
    %cast_2100 = tensor.cast %2120 : tensor<128xi8> to tensor<128xi8>
    %cast_2101 = tensor.cast %cast_2100 : tensor<128xi8> to tensor<128xi8>
    %2121 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2122 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2123 = torch.aten.item %2121 : !torch.vtensor<[],f32> -> !torch.float
    %2124 = torch_c.to_f64 %2123
    %2125 = torch.aten.item %2122 : !torch.vtensor<[],si8> -> !torch.int
    %2126 = torch_c.to_i64 %2125
    %cast_2102 = tensor.cast %cast_2101 : tensor<128xi8> to tensor<128xi8>
    %c1_2103 = arith.constant 1 : index
    %c0_2104 = arith.constant 0 : index
    %c128_2105 = arith.constant 128 : index
    %2127 = tensor.empty() : tensor<128xf32>
    %2128 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2102 : tensor<128xi8>) outs(%2127 : tensor<128xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2125
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2123
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128xf32>
    %cast_2106 = tensor.cast %2128 : tensor<128xf32> to tensor<128xf32>
    %int0_2107 = torch.constant.int 0
    %int0_2108 = torch.constant.int 0
    %int1_2109 = torch.constant.int 1
    %int1_2110 = torch.constant.int 1
    %int1_2111 = torch.constant.int 1
    %int1_2112 = torch.constant.int 1
    %int0_2113 = torch.constant.int 0
    %2129 = torch.prim.ListConstruct %int0_2107, %int0_2108 : (!torch.int, !torch.int) -> !torch.list<int>
    %2130 = torch.prim.ListConstruct %int1_2109, %int1_2110 : (!torch.int, !torch.int) -> !torch.list<int>
    %2131 = torch.prim.ListConstruct %int1_2111, %int1_2112 : (!torch.int, !torch.int) -> !torch.list<int>
    %2132 = torch.prim.ListConstruct %int0_2113, %int0_2113 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_2114 = torch.constant.bool false
    %int1_2115 = torch.constant.int 1
    %2133 = torch_c.to_i64 %int1_2115
    %2134 = torch_c.to_i64 %int0_2107
    %2135 = torch_c.to_i64 %int0_2108
    %2136 = torch_c.to_i64 %int0_2113
    %2137 = torch_c.to_i64 %int0_2113
    %c0_2116 = arith.constant 0 : index
    %c1_2117 = arith.constant 1 : index
    %c1_2118 = arith.constant 1 : index
    %c512_2119 = arith.constant 512 : index
    %c2_2120 = arith.constant 2 : index
    %c28_2121 = arith.constant 28 : index
    %c3_2122 = arith.constant 3 : index
    %c28_2123 = arith.constant 28 : index
    %c0_2124 = arith.constant 0 : index
    %c128_2125 = arith.constant 128 : index
    %c1_2126 = arith.constant 1 : index
    %c512_2127 = arith.constant 512 : index
    %c2_2128 = arith.constant 2 : index
    %c1_2129 = arith.constant 1 : index
    %c3_2130 = arith.constant 3 : index
    %c1_2131 = arith.constant 1 : index
    %2138 = arith.index_cast %2133 : i64 to index
    %c0_2132 = arith.constant 0 : index
    %2139 = arith.remsi %c512_2119, %2138 : index
    %2140 = arith.cmpi eq, %c0_2132, %2139 : index
    cf.assert %2140, "invalid: groups must divide input channel size evenly."
    %c0_2133 = arith.constant 0 : index
    %2141 = arith.remsi %c128_2125, %2138 : index
    %2142 = arith.cmpi eq, %c0_2133, %2141 : index
    cf.assert %2142, "invalid: groups must divide weight batch size evenly."
    %c1_i64_2134 = arith.constant 1 : i64
    %c1_i64_2135 = arith.constant 1 : i64
    %c1_i64_2136 = arith.constant 1 : i64
    %c1_i64_2137 = arith.constant 1 : i64
    %cst_2138 = arith.constant 0.000000e+00 : f32
    %c0_2139 = arith.constant 0 : index
    %c1_2140 = arith.constant 1 : index
    %c1_2141 = arith.constant 1 : index
    %c512_2142 = arith.constant 512 : index
    %c2_2143 = arith.constant 2 : index
    %c28_2144 = arith.constant 28 : index
    %c3_2145 = arith.constant 3 : index
    %c28_2146 = arith.constant 28 : index
    %c0_i64_2147 = arith.constant 0 : i64
    %2143 = arith.index_cast %c0_i64_2147 : i64 to index
    %2144 = arith.index_cast %c0_i64_2147 : i64 to index
    %2145 = arith.index_cast %2134 : i64 to index
    %2146 = arith.index_cast %2135 : i64 to index
    %padded_2148 = tensor.pad %cast_2080 low[%2143, %2144, %2145, %2146] high[%2143, %2144, %2145, %2146] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_2138 : f32
    } : tensor<1x512x28x28xf32> to tensor<?x?x?x?xf32>
    %2147 = arith.index_cast %c1_2129 : index to i64
    %c1_i64_2149 = arith.constant 1 : i64
    %c2_i64_2150 = arith.constant 2 : i64
    %2148 = arith.muli %2134, %c2_i64_2150 : i64
    %2149 = arith.index_cast %c28_2121 : index to i64
    %2150 = arith.addi %2149, %2148 : i64
    %2151 = arith.subi %2147, %c1_i64_2149 : i64
    %2152 = arith.muli %c1_i64_2134, %2151 : i64
    %2153 = arith.subi %2150, %2152 : i64
    %2154 = arith.subi %2153, %c1_i64_2149 : i64
    %2155 = arith.floordivsi %2154, %c1_i64_2136 : i64
    %2156 = arith.addi %2155, %c1_i64_2149 : i64
    %2157 = arith.index_cast %2156 : i64 to index
    %2158 = arith.index_cast %c1_2131 : index to i64
    %c1_i64_2151 = arith.constant 1 : i64
    %c2_i64_2152 = arith.constant 2 : i64
    %2159 = arith.muli %2135, %c2_i64_2152 : i64
    %2160 = arith.index_cast %c28_2123 : index to i64
    %2161 = arith.addi %2160, %2159 : i64
    %2162 = arith.subi %2158, %c1_i64_2151 : i64
    %2163 = arith.muli %c1_i64_2135, %2162 : i64
    %2164 = arith.subi %2161, %2163 : i64
    %2165 = arith.subi %2164, %c1_i64_2151 : i64
    %2166 = arith.floordivsi %2165, %c1_i64_2137 : i64
    %2167 = arith.addi %2166, %c1_i64_2151 : i64
    %2168 = arith.index_cast %2167 : i64 to index
    %2169 = tensor.empty(%2157, %2168) : tensor<1x128x?x?xf32>
    %2170 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2106 : tensor<128xf32>) outs(%2169 : tensor<1x128x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x128x?x?xf32>
    %2171 = arith.floordivsi %c512_2119, %2138 : index
    %2172 = arith.floordivsi %c128_2125, %2138 : index
    %c0_2153 = arith.constant 0 : index
    %c1_2154 = arith.constant 1 : index
    %2173 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2148, %cast_2095 : tensor<?x?x?x?xf32>, tensor<128x512x1x1xf32>) outs(%2170 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
    %cast_2155 = tensor.cast %2173 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
    %c1_2156 = arith.constant 1 : index
    %c1_2157 = arith.constant 1 : index
    %c128_2158 = arith.constant 128 : index
    %c2_2159 = arith.constant 2 : index
    %c28_2160 = arith.constant 28 : index
    %c3_2161 = arith.constant 3 : index
    %c28_2162 = arith.constant 28 : index
    %2174 = tensor.empty() : tensor<1x128x28x28xf32>
    %2175 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2155 : tensor<1x128x28x28xf32>) outs(%2174 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_2163 = tensor.cast %2175 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %2176 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2177 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2164 = torch.constant.int 12
    %2178 = torch.aten.item %2176 : !torch.vtensor<[],f32> -> !torch.float
    %2179 = torch_c.to_f64 %2178
    %2180 = torch.aten.item %2177 : !torch.vtensor<[],si8> -> !torch.int
    %2181 = torch_c.to_i64 %2180
    %c1_2165 = arith.constant 1 : index
    %c1_2166 = arith.constant 1 : index
    %c128_2167 = arith.constant 128 : index
    %c2_2168 = arith.constant 2 : index
    %c28_2169 = arith.constant 28 : index
    %c3_2170 = arith.constant 3 : index
    %c28_2171 = arith.constant 28 : index
    %2182 = tensor.empty() : tensor<1x128x28x28xi8>
    %2183 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2163 : tensor<1x128x28x28xf32>) outs(%2182 : tensor<1x128x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2180
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2178
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x128x28x28xi8>
    %cast_2172 = tensor.cast %2183 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %cast_2173 = tensor.cast %cast_2172 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %2184 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2185 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2186 = torch.aten.item %2184 : !torch.vtensor<[],f32> -> !torch.float
    %2187 = torch_c.to_f64 %2186
    %2188 = torch.aten.item %2185 : !torch.vtensor<[],si8> -> !torch.int
    %2189 = torch_c.to_i64 %2188
    %cast_2174 = tensor.cast %cast_2173 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %c1_2175 = arith.constant 1 : index
    %c1_2176 = arith.constant 1 : index
    %c128_2177 = arith.constant 128 : index
    %c2_2178 = arith.constant 2 : index
    %c28_2179 = arith.constant 28 : index
    %c3_2180 = arith.constant 3 : index
    %c28_2181 = arith.constant 28 : index
    %2190 = tensor.empty() : tensor<1x128x28x28xf32>
    %2191 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2174 : tensor<1x128x28x28xi8>) outs(%2190 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2188
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2186
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_2182 = tensor.cast %2191 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %2192 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2193 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2183 = torch.constant.int 12
    %2194 = torch.aten.item %2192 : !torch.vtensor<[],f32> -> !torch.float
    %2195 = torch_c.to_f64 %2194
    %2196 = torch.aten.item %2193 : !torch.vtensor<[],si8> -> !torch.int
    %2197 = torch_c.to_i64 %2196
    %c1_2184 = arith.constant 1 : index
    %c0_2185 = arith.constant 0 : index
    %c128_2186 = arith.constant 128 : index
    %c1_2187 = arith.constant 1 : index
    %c128_2188 = arith.constant 128 : index
    %c2_2189 = arith.constant 2 : index
    %c3_2190 = arith.constant 3 : index
    %c3_2191 = arith.constant 3 : index
    %c3_2192 = arith.constant 3 : index
    %2198 = tensor.empty() : tensor<128x128x3x3xi8>
    %2199 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%78 : tensor<128x128x3x3xf32>) outs(%2198 : tensor<128x128x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2196
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2194
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128x128x3x3xi8>
    %cast_2193 = tensor.cast %2199 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
    %cast_2194 = tensor.cast %cast_2193 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
    %2200 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2201 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2202 = torch.aten.item %2200 : !torch.vtensor<[],f32> -> !torch.float
    %2203 = torch_c.to_f64 %2202
    %2204 = torch.aten.item %2201 : !torch.vtensor<[],si8> -> !torch.int
    %2205 = torch_c.to_i64 %2204
    %cast_2195 = tensor.cast %cast_2194 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
    %c1_2196 = arith.constant 1 : index
    %c0_2197 = arith.constant 0 : index
    %c128_2198 = arith.constant 128 : index
    %c1_2199 = arith.constant 1 : index
    %c128_2200 = arith.constant 128 : index
    %c2_2201 = arith.constant 2 : index
    %c3_2202 = arith.constant 3 : index
    %c3_2203 = arith.constant 3 : index
    %c3_2204 = arith.constant 3 : index
    %2206 = tensor.empty() : tensor<128x128x3x3xf32>
    %2207 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2195 : tensor<128x128x3x3xi8>) outs(%2206 : tensor<128x128x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2204
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2202
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128x128x3x3xf32>
    %cast_2205 = tensor.cast %2207 : tensor<128x128x3x3xf32> to tensor<128x128x3x3xf32>
    %2208 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2209 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2206 = torch.constant.int 12
    %2210 = torch.aten.item %2208 : !torch.vtensor<[],f32> -> !torch.float
    %2211 = torch_c.to_f64 %2210
    %2212 = torch.aten.item %2209 : !torch.vtensor<[],si8> -> !torch.int
    %2213 = torch_c.to_i64 %2212
    %c1_2207 = arith.constant 1 : index
    %c0_2208 = arith.constant 0 : index
    %c128_2209 = arith.constant 128 : index
    %2214 = tensor.empty() : tensor<128xi8>
    %2215 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%80 : tensor<128xf32>) outs(%2214 : tensor<128xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2212
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2210
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128xi8>
    %cast_2210 = tensor.cast %2215 : tensor<128xi8> to tensor<128xi8>
    %cast_2211 = tensor.cast %cast_2210 : tensor<128xi8> to tensor<128xi8>
    %2216 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2217 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2218 = torch.aten.item %2216 : !torch.vtensor<[],f32> -> !torch.float
    %2219 = torch_c.to_f64 %2218
    %2220 = torch.aten.item %2217 : !torch.vtensor<[],si8> -> !torch.int
    %2221 = torch_c.to_i64 %2220
    %cast_2212 = tensor.cast %cast_2211 : tensor<128xi8> to tensor<128xi8>
    %c1_2213 = arith.constant 1 : index
    %c0_2214 = arith.constant 0 : index
    %c128_2215 = arith.constant 128 : index
    %2222 = tensor.empty() : tensor<128xf32>
    %2223 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2212 : tensor<128xi8>) outs(%2222 : tensor<128xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2220
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2218
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128xf32>
    %cast_2216 = tensor.cast %2223 : tensor<128xf32> to tensor<128xf32>
    %int1_2217 = torch.constant.int 1
    %int1_2218 = torch.constant.int 1
    %int1_2219 = torch.constant.int 1
    %int1_2220 = torch.constant.int 1
    %int1_2221 = torch.constant.int 1
    %int1_2222 = torch.constant.int 1
    %int0_2223 = torch.constant.int 0
    %2224 = torch.prim.ListConstruct %int1_2217, %int1_2218 : (!torch.int, !torch.int) -> !torch.list<int>
    %2225 = torch.prim.ListConstruct %int1_2219, %int1_2220 : (!torch.int, !torch.int) -> !torch.list<int>
    %2226 = torch.prim.ListConstruct %int1_2221, %int1_2222 : (!torch.int, !torch.int) -> !torch.list<int>
    %2227 = torch.prim.ListConstruct %int0_2223, %int0_2223 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_2224 = torch.constant.bool false
    %int1_2225 = torch.constant.int 1
    %2228 = torch_c.to_i64 %int1_2225
    %2229 = torch_c.to_i64 %int1_2217
    %2230 = torch_c.to_i64 %int1_2218
    %2231 = torch_c.to_i64 %int0_2223
    %2232 = torch_c.to_i64 %int0_2223
    %c0_2226 = arith.constant 0 : index
    %c1_2227 = arith.constant 1 : index
    %c1_2228 = arith.constant 1 : index
    %c128_2229 = arith.constant 128 : index
    %c2_2230 = arith.constant 2 : index
    %c28_2231 = arith.constant 28 : index
    %c3_2232 = arith.constant 3 : index
    %c28_2233 = arith.constant 28 : index
    %c0_2234 = arith.constant 0 : index
    %c128_2235 = arith.constant 128 : index
    %c1_2236 = arith.constant 1 : index
    %c128_2237 = arith.constant 128 : index
    %c2_2238 = arith.constant 2 : index
    %c3_2239 = arith.constant 3 : index
    %c3_2240 = arith.constant 3 : index
    %c3_2241 = arith.constant 3 : index
    %2233 = arith.index_cast %2228 : i64 to index
    %c0_2242 = arith.constant 0 : index
    %2234 = arith.remsi %c128_2229, %2233 : index
    %2235 = arith.cmpi eq, %c0_2242, %2234 : index
    cf.assert %2235, "invalid: groups must divide input channel size evenly."
    %c0_2243 = arith.constant 0 : index
    %2236 = arith.remsi %c128_2235, %2233 : index
    %2237 = arith.cmpi eq, %c0_2243, %2236 : index
    cf.assert %2237, "invalid: groups must divide weight batch size evenly."
    %c1_i64_2244 = arith.constant 1 : i64
    %c1_i64_2245 = arith.constant 1 : i64
    %c1_i64_2246 = arith.constant 1 : i64
    %c1_i64_2247 = arith.constant 1 : i64
    %cst_2248 = arith.constant 0.000000e+00 : f32
    %c0_2249 = arith.constant 0 : index
    %c1_2250 = arith.constant 1 : index
    %c1_2251 = arith.constant 1 : index
    %c128_2252 = arith.constant 128 : index
    %c2_2253 = arith.constant 2 : index
    %c28_2254 = arith.constant 28 : index
    %c3_2255 = arith.constant 3 : index
    %c28_2256 = arith.constant 28 : index
    %c0_i64_2257 = arith.constant 0 : i64
    %2238 = arith.index_cast %c0_i64_2257 : i64 to index
    %2239 = arith.index_cast %c0_i64_2257 : i64 to index
    %2240 = arith.index_cast %2229 : i64 to index
    %2241 = arith.index_cast %2230 : i64 to index
    %padded_2258 = tensor.pad %cast_2182 low[%2238, %2239, %2240, %2241] high[%2238, %2239, %2240, %2241] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_2248 : f32
    } : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
    %2242 = arith.index_cast %c3_2239 : index to i64
    %c1_i64_2259 = arith.constant 1 : i64
    %c2_i64_2260 = arith.constant 2 : i64
    %2243 = arith.muli %2229, %c2_i64_2260 : i64
    %2244 = arith.index_cast %c28_2231 : index to i64
    %2245 = arith.addi %2244, %2243 : i64
    %2246 = arith.subi %2242, %c1_i64_2259 : i64
    %2247 = arith.muli %c1_i64_2244, %2246 : i64
    %2248 = arith.subi %2245, %2247 : i64
    %2249 = arith.subi %2248, %c1_i64_2259 : i64
    %2250 = arith.floordivsi %2249, %c1_i64_2246 : i64
    %2251 = arith.addi %2250, %c1_i64_2259 : i64
    %2252 = arith.index_cast %2251 : i64 to index
    %2253 = arith.index_cast %c3_2241 : index to i64
    %c1_i64_2261 = arith.constant 1 : i64
    %c2_i64_2262 = arith.constant 2 : i64
    %2254 = arith.muli %2230, %c2_i64_2262 : i64
    %2255 = arith.index_cast %c28_2233 : index to i64
    %2256 = arith.addi %2255, %2254 : i64
    %2257 = arith.subi %2253, %c1_i64_2261 : i64
    %2258 = arith.muli %c1_i64_2245, %2257 : i64
    %2259 = arith.subi %2256, %2258 : i64
    %2260 = arith.subi %2259, %c1_i64_2261 : i64
    %2261 = arith.floordivsi %2260, %c1_i64_2247 : i64
    %2262 = arith.addi %2261, %c1_i64_2261 : i64
    %2263 = arith.index_cast %2262 : i64 to index
    %2264 = tensor.empty(%2252, %2263) : tensor<1x128x?x?xf32>
    %2265 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2216 : tensor<128xf32>) outs(%2264 : tensor<1x128x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x128x?x?xf32>
    %2266 = arith.floordivsi %c128_2229, %2233 : index
    %2267 = arith.floordivsi %c128_2235, %2233 : index
    %c0_2263 = arith.constant 0 : index
    %c1_2264 = arith.constant 1 : index
    %2268 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2258, %cast_2205 : tensor<?x?x?x?xf32>, tensor<128x128x3x3xf32>) outs(%2265 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
    %cast_2265 = tensor.cast %2268 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
    %c1_2266 = arith.constant 1 : index
    %c1_2267 = arith.constant 1 : index
    %c128_2268 = arith.constant 128 : index
    %c2_2269 = arith.constant 2 : index
    %c28_2270 = arith.constant 28 : index
    %c3_2271 = arith.constant 3 : index
    %c28_2272 = arith.constant 28 : index
    %2269 = tensor.empty() : tensor<1x128x28x28xf32>
    %2270 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2265 : tensor<1x128x28x28xf32>) outs(%2269 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_2273 = tensor.cast %2270 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %2271 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2272 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2274 = torch.constant.int 12
    %2273 = torch.aten.item %2271 : !torch.vtensor<[],f32> -> !torch.float
    %2274 = torch_c.to_f64 %2273
    %2275 = torch.aten.item %2272 : !torch.vtensor<[],si8> -> !torch.int
    %2276 = torch_c.to_i64 %2275
    %c1_2275 = arith.constant 1 : index
    %c1_2276 = arith.constant 1 : index
    %c128_2277 = arith.constant 128 : index
    %c2_2278 = arith.constant 2 : index
    %c28_2279 = arith.constant 28 : index
    %c3_2280 = arith.constant 3 : index
    %c28_2281 = arith.constant 28 : index
    %2277 = tensor.empty() : tensor<1x128x28x28xi8>
    %2278 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2273 : tensor<1x128x28x28xf32>) outs(%2277 : tensor<1x128x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2275
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2273
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x128x28x28xi8>
    %cast_2282 = tensor.cast %2278 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %cast_2283 = tensor.cast %cast_2282 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %2279 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2280 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2281 = torch.aten.item %2279 : !torch.vtensor<[],f32> -> !torch.float
    %2282 = torch_c.to_f64 %2281
    %2283 = torch.aten.item %2280 : !torch.vtensor<[],si8> -> !torch.int
    %2284 = torch_c.to_i64 %2283
    %cast_2284 = tensor.cast %cast_2283 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %c1_2285 = arith.constant 1 : index
    %c1_2286 = arith.constant 1 : index
    %c128_2287 = arith.constant 128 : index
    %c2_2288 = arith.constant 2 : index
    %c28_2289 = arith.constant 28 : index
    %c3_2290 = arith.constant 3 : index
    %c28_2291 = arith.constant 28 : index
    %2285 = tensor.empty() : tensor<1x128x28x28xf32>
    %2286 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2284 : tensor<1x128x28x28xi8>) outs(%2285 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2283
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2281
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_2292 = tensor.cast %2286 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %2287 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2288 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2293 = torch.constant.int 12
    %2289 = torch.aten.item %2287 : !torch.vtensor<[],f32> -> !torch.float
    %2290 = torch_c.to_f64 %2289
    %2291 = torch.aten.item %2288 : !torch.vtensor<[],si8> -> !torch.int
    %2292 = torch_c.to_i64 %2291
    %c1_2294 = arith.constant 1 : index
    %c0_2295 = arith.constant 0 : index
    %c512_2296 = arith.constant 512 : index
    %c1_2297 = arith.constant 1 : index
    %c128_2298 = arith.constant 128 : index
    %2293 = tensor.empty() : tensor<512x128x1x1xi8>
    %2294 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%82 : tensor<512x128x1x1xf32>) outs(%2293 : tensor<512x128x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2291
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2289
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512x128x1x1xi8>
    %cast_2299 = tensor.cast %2294 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
    %cast_2300 = tensor.cast %cast_2299 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
    %2295 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2296 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2297 = torch.aten.item %2295 : !torch.vtensor<[],f32> -> !torch.float
    %2298 = torch_c.to_f64 %2297
    %2299 = torch.aten.item %2296 : !torch.vtensor<[],si8> -> !torch.int
    %2300 = torch_c.to_i64 %2299
    %cast_2301 = tensor.cast %cast_2300 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
    %c1_2302 = arith.constant 1 : index
    %c0_2303 = arith.constant 0 : index
    %c512_2304 = arith.constant 512 : index
    %c1_2305 = arith.constant 1 : index
    %c128_2306 = arith.constant 128 : index
    %2301 = tensor.empty() : tensor<512x128x1x1xf32>
    %2302 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2301 : tensor<512x128x1x1xi8>) outs(%2301 : tensor<512x128x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2299
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2297
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512x128x1x1xf32>
    %cast_2307 = tensor.cast %2302 : tensor<512x128x1x1xf32> to tensor<512x128x1x1xf32>
    %2303 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2304 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2308 = torch.constant.int 12
    %2305 = torch.aten.item %2303 : !torch.vtensor<[],f32> -> !torch.float
    %2306 = torch_c.to_f64 %2305
    %2307 = torch.aten.item %2304 : !torch.vtensor<[],si8> -> !torch.int
    %2308 = torch_c.to_i64 %2307
    %c1_2309 = arith.constant 1 : index
    %c0_2310 = arith.constant 0 : index
    %c512_2311 = arith.constant 512 : index
    %2309 = tensor.empty() : tensor<512xi8>
    %2310 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%84 : tensor<512xf32>) outs(%2309 : tensor<512xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2307
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2305
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512xi8>
    %cast_2312 = tensor.cast %2310 : tensor<512xi8> to tensor<512xi8>
    %cast_2313 = tensor.cast %cast_2312 : tensor<512xi8> to tensor<512xi8>
    %2311 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2312 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2313 = torch.aten.item %2311 : !torch.vtensor<[],f32> -> !torch.float
    %2314 = torch_c.to_f64 %2313
    %2315 = torch.aten.item %2312 : !torch.vtensor<[],si8> -> !torch.int
    %2316 = torch_c.to_i64 %2315
    %cast_2314 = tensor.cast %cast_2313 : tensor<512xi8> to tensor<512xi8>
    %c1_2315 = arith.constant 1 : index
    %c0_2316 = arith.constant 0 : index
    %c512_2317 = arith.constant 512 : index
    %2317 = tensor.empty() : tensor<512xf32>
    %2318 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2314 : tensor<512xi8>) outs(%2317 : tensor<512xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2315
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2313
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512xf32>
    %cast_2318 = tensor.cast %2318 : tensor<512xf32> to tensor<512xf32>
    %int0_2319 = torch.constant.int 0
    %int0_2320 = torch.constant.int 0
    %int1_2321 = torch.constant.int 1
    %int1_2322 = torch.constant.int 1
    %int1_2323 = torch.constant.int 1
    %int1_2324 = torch.constant.int 1
    %int0_2325 = torch.constant.int 0
    %2319 = torch.prim.ListConstruct %int0_2319, %int0_2320 : (!torch.int, !torch.int) -> !torch.list<int>
    %2320 = torch.prim.ListConstruct %int1_2321, %int1_2322 : (!torch.int, !torch.int) -> !torch.list<int>
    %2321 = torch.prim.ListConstruct %int1_2323, %int1_2324 : (!torch.int, !torch.int) -> !torch.list<int>
    %2322 = torch.prim.ListConstruct %int0_2325, %int0_2325 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_2326 = torch.constant.bool false
    %int1_2327 = torch.constant.int 1
    %2323 = torch_c.to_i64 %int1_2327
    %2324 = torch_c.to_i64 %int0_2319
    %2325 = torch_c.to_i64 %int0_2320
    %2326 = torch_c.to_i64 %int0_2325
    %2327 = torch_c.to_i64 %int0_2325
    %c0_2328 = arith.constant 0 : index
    %c1_2329 = arith.constant 1 : index
    %c1_2330 = arith.constant 1 : index
    %c128_2331 = arith.constant 128 : index
    %c2_2332 = arith.constant 2 : index
    %c28_2333 = arith.constant 28 : index
    %c3_2334 = arith.constant 3 : index
    %c28_2335 = arith.constant 28 : index
    %c0_2336 = arith.constant 0 : index
    %c512_2337 = arith.constant 512 : index
    %c1_2338 = arith.constant 1 : index
    %c128_2339 = arith.constant 128 : index
    %c2_2340 = arith.constant 2 : index
    %c1_2341 = arith.constant 1 : index
    %c3_2342 = arith.constant 3 : index
    %c1_2343 = arith.constant 1 : index
    %2328 = arith.index_cast %2323 : i64 to index
    %c0_2344 = arith.constant 0 : index
    %2329 = arith.remsi %c128_2331, %2328 : index
    %2330 = arith.cmpi eq, %c0_2344, %2329 : index
    cf.assert %2330, "invalid: groups must divide input channel size evenly."
    %c0_2345 = arith.constant 0 : index
    %2331 = arith.remsi %c512_2337, %2328 : index
    %2332 = arith.cmpi eq, %c0_2345, %2331 : index
    cf.assert %2332, "invalid: groups must divide weight batch size evenly."
    %c1_i64_2346 = arith.constant 1 : i64
    %c1_i64_2347 = arith.constant 1 : i64
    %c1_i64_2348 = arith.constant 1 : i64
    %c1_i64_2349 = arith.constant 1 : i64
    %cst_2350 = arith.constant 0.000000e+00 : f32
    %c0_2351 = arith.constant 0 : index
    %c1_2352 = arith.constant 1 : index
    %c1_2353 = arith.constant 1 : index
    %c128_2354 = arith.constant 128 : index
    %c2_2355 = arith.constant 2 : index
    %c28_2356 = arith.constant 28 : index
    %c3_2357 = arith.constant 3 : index
    %c28_2358 = arith.constant 28 : index
    %c0_i64_2359 = arith.constant 0 : i64
    %2333 = arith.index_cast %c0_i64_2359 : i64 to index
    %2334 = arith.index_cast %c0_i64_2359 : i64 to index
    %2335 = arith.index_cast %2324 : i64 to index
    %2336 = arith.index_cast %2325 : i64 to index
    %padded_2360 = tensor.pad %cast_2292 low[%2333, %2334, %2335, %2336] high[%2333, %2334, %2335, %2336] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_2350 : f32
    } : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
    %2337 = arith.index_cast %c1_2341 : index to i64
    %c1_i64_2361 = arith.constant 1 : i64
    %c2_i64_2362 = arith.constant 2 : i64
    %2338 = arith.muli %2324, %c2_i64_2362 : i64
    %2339 = arith.index_cast %c28_2333 : index to i64
    %2340 = arith.addi %2339, %2338 : i64
    %2341 = arith.subi %2337, %c1_i64_2361 : i64
    %2342 = arith.muli %c1_i64_2346, %2341 : i64
    %2343 = arith.subi %2340, %2342 : i64
    %2344 = arith.subi %2343, %c1_i64_2361 : i64
    %2345 = arith.floordivsi %2344, %c1_i64_2348 : i64
    %2346 = arith.addi %2345, %c1_i64_2361 : i64
    %2347 = arith.index_cast %2346 : i64 to index
    %2348 = arith.index_cast %c1_2343 : index to i64
    %c1_i64_2363 = arith.constant 1 : i64
    %c2_i64_2364 = arith.constant 2 : i64
    %2349 = arith.muli %2325, %c2_i64_2364 : i64
    %2350 = arith.index_cast %c28_2335 : index to i64
    %2351 = arith.addi %2350, %2349 : i64
    %2352 = arith.subi %2348, %c1_i64_2363 : i64
    %2353 = arith.muli %c1_i64_2347, %2352 : i64
    %2354 = arith.subi %2351, %2353 : i64
    %2355 = arith.subi %2354, %c1_i64_2363 : i64
    %2356 = arith.floordivsi %2355, %c1_i64_2349 : i64
    %2357 = arith.addi %2356, %c1_i64_2363 : i64
    %2358 = arith.index_cast %2357 : i64 to index
    %2359 = tensor.empty(%2347, %2358) : tensor<1x512x?x?xf32>
    %2360 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2318 : tensor<512xf32>) outs(%2359 : tensor<1x512x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x512x?x?xf32>
    %2361 = arith.floordivsi %c128_2331, %2328 : index
    %2362 = arith.floordivsi %c512_2337, %2328 : index
    %c0_2365 = arith.constant 0 : index
    %c1_2366 = arith.constant 1 : index
    %2363 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2360, %cast_2307 : tensor<?x?x?x?xf32>, tensor<512x128x1x1xf32>) outs(%2360 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
    %cast_2367 = tensor.cast %2363 : tensor<1x512x?x?xf32> to tensor<1x512x28x28xf32>
    %2364 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2365 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2368 = torch.constant.int 12
    %2366 = torch.aten.item %2364 : !torch.vtensor<[],f32> -> !torch.float
    %2367 = torch_c.to_f64 %2366
    %2368 = torch.aten.item %2365 : !torch.vtensor<[],si8> -> !torch.int
    %2369 = torch_c.to_i64 %2368
    %c1_2369 = arith.constant 1 : index
    %c1_2370 = arith.constant 1 : index
    %c512_2371 = arith.constant 512 : index
    %c2_2372 = arith.constant 2 : index
    %c28_2373 = arith.constant 28 : index
    %c3_2374 = arith.constant 3 : index
    %c28_2375 = arith.constant 28 : index
    %2370 = tensor.empty() : tensor<1x512x28x28xi8>
    %2371 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2367 : tensor<1x512x28x28xf32>) outs(%2370 : tensor<1x512x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2368
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2366
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x28x28xi8>
    %cast_2376 = tensor.cast %2371 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %cast_2377 = tensor.cast %cast_2376 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %2372 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2373 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2374 = torch.aten.item %2372 : !torch.vtensor<[],f32> -> !torch.float
    %2375 = torch_c.to_f64 %2374
    %2376 = torch.aten.item %2373 : !torch.vtensor<[],si8> -> !torch.int
    %2377 = torch_c.to_i64 %2376
    %cast_2378 = tensor.cast %cast_2377 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %c1_2379 = arith.constant 1 : index
    %c1_2380 = arith.constant 1 : index
    %c512_2381 = arith.constant 512 : index
    %c2_2382 = arith.constant 2 : index
    %c28_2383 = arith.constant 28 : index
    %c3_2384 = arith.constant 3 : index
    %c28_2385 = arith.constant 28 : index
    %2378 = tensor.empty() : tensor<1x512x28x28xf32>
    %2379 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2378 : tensor<1x512x28x28xi8>) outs(%2378 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2376
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2374
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_2386 = tensor.cast %2379 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %int1_2387 = torch.constant.int 1
    %2380 = torch_c.to_i64 %int1_2387
    %c1_2388 = arith.constant 1 : index
    %c1_2389 = arith.constant 1 : index
    %c512_2390 = arith.constant 512 : index
    %c2_2391 = arith.constant 2 : index
    %c28_2392 = arith.constant 28 : index
    %c3_2393 = arith.constant 3 : index
    %c28_2394 = arith.constant 28 : index
    %c1_2395 = arith.constant 1 : index
    %c512_2396 = arith.constant 512 : index
    %2381 = arith.cmpi eq, %c512_2390, %c512_2396 : index
    cf.assert %2381, "mismatched size for broadcast"
    %c2_2397 = arith.constant 2 : index
    %c28_2398 = arith.constant 28 : index
    %2382 = arith.cmpi eq, %c28_2392, %c28_2398 : index
    cf.assert %2382, "mismatched size for broadcast"
    %c3_2399 = arith.constant 3 : index
    %c28_2400 = arith.constant 28 : index
    %2383 = arith.cmpi eq, %c28_2394, %c28_2400 : index
    cf.assert %2383, "mismatched size for broadcast"
    %2384 = tensor.empty() : tensor<1x512x28x28xf32>
    %2385 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2386, %cast_2080 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%2384 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %2380 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_2401 = tensor.cast %2385 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %c1_2402 = arith.constant 1 : index
    %c1_2403 = arith.constant 1 : index
    %c512_2404 = arith.constant 512 : index
    %c2_2405 = arith.constant 2 : index
    %c28_2406 = arith.constant 28 : index
    %c3_2407 = arith.constant 3 : index
    %c28_2408 = arith.constant 28 : index
    %2386 = tensor.empty() : tensor<1x512x28x28xf32>
    %2387 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2401 : tensor<1x512x28x28xf32>) outs(%2386 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_2409 = tensor.cast %2387 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %2388 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2389 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2410 = torch.constant.int 12
    %2390 = torch.aten.item %2388 : !torch.vtensor<[],f32> -> !torch.float
    %2391 = torch_c.to_f64 %2390
    %2392 = torch.aten.item %2389 : !torch.vtensor<[],si8> -> !torch.int
    %2393 = torch_c.to_i64 %2392
    %c1_2411 = arith.constant 1 : index
    %c1_2412 = arith.constant 1 : index
    %c512_2413 = arith.constant 512 : index
    %c2_2414 = arith.constant 2 : index
    %c28_2415 = arith.constant 28 : index
    %c3_2416 = arith.constant 3 : index
    %c28_2417 = arith.constant 28 : index
    %2394 = tensor.empty() : tensor<1x512x28x28xi8>
    %2395 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2409 : tensor<1x512x28x28xf32>) outs(%2394 : tensor<1x512x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2392
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2390
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x28x28xi8>
    %cast_2418 = tensor.cast %2395 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %cast_2419 = tensor.cast %cast_2418 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %2396 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2397 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2398 = torch.aten.item %2396 : !torch.vtensor<[],f32> -> !torch.float
    %2399 = torch_c.to_f64 %2398
    %2400 = torch.aten.item %2397 : !torch.vtensor<[],si8> -> !torch.int
    %2401 = torch_c.to_i64 %2400
    %cast_2420 = tensor.cast %cast_2419 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %c1_2421 = arith.constant 1 : index
    %c1_2422 = arith.constant 1 : index
    %c512_2423 = arith.constant 512 : index
    %c2_2424 = arith.constant 2 : index
    %c28_2425 = arith.constant 28 : index
    %c3_2426 = arith.constant 3 : index
    %c28_2427 = arith.constant 28 : index
    %2402 = tensor.empty() : tensor<1x512x28x28xf32>
    %2403 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2420 : tensor<1x512x28x28xi8>) outs(%2402 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2400
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2398
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_2428 = tensor.cast %2403 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %2404 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %2405 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2429 = torch.constant.int 12
    %2406 = torch.aten.item %2404 : !torch.vtensor<[],f32> -> !torch.float
    %2407 = torch_c.to_f64 %2406
    %2408 = torch.aten.item %2405 : !torch.vtensor<[],si8> -> !torch.int
    %2409 = torch_c.to_i64 %2408
    %c1_2430 = arith.constant 1 : index
    %c0_2431 = arith.constant 0 : index
    %c128_2432 = arith.constant 128 : index
    %c1_2433 = arith.constant 1 : index
    %c512_2434 = arith.constant 512 : index
    %2410 = tensor.empty() : tensor<128x512x1x1xi8>
    %2411 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%86 : tensor<128x512x1x1xf32>) outs(%2410 : tensor<128x512x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2408
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2406
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128x512x1x1xi8>
    %cast_2435 = tensor.cast %2411 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
    %cast_2436 = tensor.cast %cast_2435 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
    %2412 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %2413 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2414 = torch.aten.item %2412 : !torch.vtensor<[],f32> -> !torch.float
    %2415 = torch_c.to_f64 %2414
    %2416 = torch.aten.item %2413 : !torch.vtensor<[],si8> -> !torch.int
    %2417 = torch_c.to_i64 %2416
    %cast_2437 = tensor.cast %cast_2436 : tensor<128x512x1x1xi8> to tensor<128x512x1x1xi8>
    %c1_2438 = arith.constant 1 : index
    %c0_2439 = arith.constant 0 : index
    %c128_2440 = arith.constant 128 : index
    %c1_2441 = arith.constant 1 : index
    %c512_2442 = arith.constant 512 : index
    %2418 = tensor.empty() : tensor<128x512x1x1xf32>
    %2419 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2437 : tensor<128x512x1x1xi8>) outs(%2418 : tensor<128x512x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2416
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2414
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128x512x1x1xf32>
    %cast_2443 = tensor.cast %2419 : tensor<128x512x1x1xf32> to tensor<128x512x1x1xf32>
    %2420 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2421 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2444 = torch.constant.int 12
    %2422 = torch.aten.item %2420 : !torch.vtensor<[],f32> -> !torch.float
    %2423 = torch_c.to_f64 %2422
    %2424 = torch.aten.item %2421 : !torch.vtensor<[],si8> -> !torch.int
    %2425 = torch_c.to_i64 %2424
    %c1_2445 = arith.constant 1 : index
    %c0_2446 = arith.constant 0 : index
    %c128_2447 = arith.constant 128 : index
    %2426 = tensor.empty() : tensor<128xi8>
    %2427 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%88 : tensor<128xf32>) outs(%2426 : tensor<128xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2424
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2422
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128xi8>
    %cast_2448 = tensor.cast %2427 : tensor<128xi8> to tensor<128xi8>
    %cast_2449 = tensor.cast %cast_2448 : tensor<128xi8> to tensor<128xi8>
    %2428 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2429 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2430 = torch.aten.item %2428 : !torch.vtensor<[],f32> -> !torch.float
    %2431 = torch_c.to_f64 %2430
    %2432 = torch.aten.item %2429 : !torch.vtensor<[],si8> -> !torch.int
    %2433 = torch_c.to_i64 %2432
    %cast_2450 = tensor.cast %cast_2449 : tensor<128xi8> to tensor<128xi8>
    %c1_2451 = arith.constant 1 : index
    %c0_2452 = arith.constant 0 : index
    %c128_2453 = arith.constant 128 : index
    %2434 = tensor.empty() : tensor<128xf32>
    %2435 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2450 : tensor<128xi8>) outs(%2434 : tensor<128xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2432
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2430
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128xf32>
    %cast_2454 = tensor.cast %2435 : tensor<128xf32> to tensor<128xf32>
    %int0_2455 = torch.constant.int 0
    %int0_2456 = torch.constant.int 0
    %int1_2457 = torch.constant.int 1
    %int1_2458 = torch.constant.int 1
    %int1_2459 = torch.constant.int 1
    %int1_2460 = torch.constant.int 1
    %int0_2461 = torch.constant.int 0
    %2436 = torch.prim.ListConstruct %int0_2455, %int0_2456 : (!torch.int, !torch.int) -> !torch.list<int>
    %2437 = torch.prim.ListConstruct %int1_2457, %int1_2458 : (!torch.int, !torch.int) -> !torch.list<int>
    %2438 = torch.prim.ListConstruct %int1_2459, %int1_2460 : (!torch.int, !torch.int) -> !torch.list<int>
    %2439 = torch.prim.ListConstruct %int0_2461, %int0_2461 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_2462 = torch.constant.bool false
    %int1_2463 = torch.constant.int 1
    %2440 = torch_c.to_i64 %int1_2463
    %2441 = torch_c.to_i64 %int0_2455
    %2442 = torch_c.to_i64 %int0_2456
    %2443 = torch_c.to_i64 %int0_2461
    %2444 = torch_c.to_i64 %int0_2461
    %c0_2464 = arith.constant 0 : index
    %c1_2465 = arith.constant 1 : index
    %c1_2466 = arith.constant 1 : index
    %c512_2467 = arith.constant 512 : index
    %c2_2468 = arith.constant 2 : index
    %c28_2469 = arith.constant 28 : index
    %c3_2470 = arith.constant 3 : index
    %c28_2471 = arith.constant 28 : index
    %c0_2472 = arith.constant 0 : index
    %c128_2473 = arith.constant 128 : index
    %c1_2474 = arith.constant 1 : index
    %c512_2475 = arith.constant 512 : index
    %c2_2476 = arith.constant 2 : index
    %c1_2477 = arith.constant 1 : index
    %c3_2478 = arith.constant 3 : index
    %c1_2479 = arith.constant 1 : index
    %2445 = arith.index_cast %2440 : i64 to index
    %c0_2480 = arith.constant 0 : index
    %2446 = arith.remsi %c512_2467, %2445 : index
    %2447 = arith.cmpi eq, %c0_2480, %2446 : index
    cf.assert %2447, "invalid: groups must divide input channel size evenly."
    %c0_2481 = arith.constant 0 : index
    %2448 = arith.remsi %c128_2473, %2445 : index
    %2449 = arith.cmpi eq, %c0_2481, %2448 : index
    cf.assert %2449, "invalid: groups must divide weight batch size evenly."
    %c1_i64_2482 = arith.constant 1 : i64
    %c1_i64_2483 = arith.constant 1 : i64
    %c1_i64_2484 = arith.constant 1 : i64
    %c1_i64_2485 = arith.constant 1 : i64
    %cst_2486 = arith.constant 0.000000e+00 : f32
    %c0_2487 = arith.constant 0 : index
    %c1_2488 = arith.constant 1 : index
    %c1_2489 = arith.constant 1 : index
    %c512_2490 = arith.constant 512 : index
    %c2_2491 = arith.constant 2 : index
    %c28_2492 = arith.constant 28 : index
    %c3_2493 = arith.constant 3 : index
    %c28_2494 = arith.constant 28 : index
    %c0_i64_2495 = arith.constant 0 : i64
    %2450 = arith.index_cast %c0_i64_2495 : i64 to index
    %2451 = arith.index_cast %c0_i64_2495 : i64 to index
    %2452 = arith.index_cast %2441 : i64 to index
    %2453 = arith.index_cast %2442 : i64 to index
    %padded_2496 = tensor.pad %cast_2428 low[%2450, %2451, %2452, %2453] high[%2450, %2451, %2452, %2453] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_2486 : f32
    } : tensor<1x512x28x28xf32> to tensor<?x?x?x?xf32>
    %2454 = arith.index_cast %c1_2477 : index to i64
    %c1_i64_2497 = arith.constant 1 : i64
    %c2_i64_2498 = arith.constant 2 : i64
    %2455 = arith.muli %2441, %c2_i64_2498 : i64
    %2456 = arith.index_cast %c28_2469 : index to i64
    %2457 = arith.addi %2456, %2455 : i64
    %2458 = arith.subi %2454, %c1_i64_2497 : i64
    %2459 = arith.muli %c1_i64_2482, %2458 : i64
    %2460 = arith.subi %2457, %2459 : i64
    %2461 = arith.subi %2460, %c1_i64_2497 : i64
    %2462 = arith.floordivsi %2461, %c1_i64_2484 : i64
    %2463 = arith.addi %2462, %c1_i64_2497 : i64
    %2464 = arith.index_cast %2463 : i64 to index
    %2465 = arith.index_cast %c1_2479 : index to i64
    %c1_i64_2499 = arith.constant 1 : i64
    %c2_i64_2500 = arith.constant 2 : i64
    %2466 = arith.muli %2442, %c2_i64_2500 : i64
    %2467 = arith.index_cast %c28_2471 : index to i64
    %2468 = arith.addi %2467, %2466 : i64
    %2469 = arith.subi %2465, %c1_i64_2499 : i64
    %2470 = arith.muli %c1_i64_2483, %2469 : i64
    %2471 = arith.subi %2468, %2470 : i64
    %2472 = arith.subi %2471, %c1_i64_2499 : i64
    %2473 = arith.floordivsi %2472, %c1_i64_2485 : i64
    %2474 = arith.addi %2473, %c1_i64_2499 : i64
    %2475 = arith.index_cast %2474 : i64 to index
    %2476 = tensor.empty(%2464, %2475) : tensor<1x128x?x?xf32>
    %2477 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2454 : tensor<128xf32>) outs(%2476 : tensor<1x128x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x128x?x?xf32>
    %2478 = arith.floordivsi %c512_2467, %2445 : index
    %2479 = arith.floordivsi %c128_2473, %2445 : index
    %c0_2501 = arith.constant 0 : index
    %c1_2502 = arith.constant 1 : index
    %2480 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2496, %cast_2443 : tensor<?x?x?x?xf32>, tensor<128x512x1x1xf32>) outs(%2477 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
    %cast_2503 = tensor.cast %2480 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
    %c1_2504 = arith.constant 1 : index
    %c1_2505 = arith.constant 1 : index
    %c128_2506 = arith.constant 128 : index
    %c2_2507 = arith.constant 2 : index
    %c28_2508 = arith.constant 28 : index
    %c3_2509 = arith.constant 3 : index
    %c28_2510 = arith.constant 28 : index
    %2481 = tensor.empty() : tensor<1x128x28x28xf32>
    %2482 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2503 : tensor<1x128x28x28xf32>) outs(%2481 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_2511 = tensor.cast %2482 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %2483 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2484 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2512 = torch.constant.int 12
    %2485 = torch.aten.item %2483 : !torch.vtensor<[],f32> -> !torch.float
    %2486 = torch_c.to_f64 %2485
    %2487 = torch.aten.item %2484 : !torch.vtensor<[],si8> -> !torch.int
    %2488 = torch_c.to_i64 %2487
    %c1_2513 = arith.constant 1 : index
    %c1_2514 = arith.constant 1 : index
    %c128_2515 = arith.constant 128 : index
    %c2_2516 = arith.constant 2 : index
    %c28_2517 = arith.constant 28 : index
    %c3_2518 = arith.constant 3 : index
    %c28_2519 = arith.constant 28 : index
    %2489 = tensor.empty() : tensor<1x128x28x28xi8>
    %2490 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2511 : tensor<1x128x28x28xf32>) outs(%2489 : tensor<1x128x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2487
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2485
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x128x28x28xi8>
    %cast_2520 = tensor.cast %2490 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %cast_2521 = tensor.cast %cast_2520 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %2491 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2492 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2493 = torch.aten.item %2491 : !torch.vtensor<[],f32> -> !torch.float
    %2494 = torch_c.to_f64 %2493
    %2495 = torch.aten.item %2492 : !torch.vtensor<[],si8> -> !torch.int
    %2496 = torch_c.to_i64 %2495
    %cast_2522 = tensor.cast %cast_2521 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %c1_2523 = arith.constant 1 : index
    %c1_2524 = arith.constant 1 : index
    %c128_2525 = arith.constant 128 : index
    %c2_2526 = arith.constant 2 : index
    %c28_2527 = arith.constant 28 : index
    %c3_2528 = arith.constant 3 : index
    %c28_2529 = arith.constant 28 : index
    %2497 = tensor.empty() : tensor<1x128x28x28xf32>
    %2498 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2522 : tensor<1x128x28x28xi8>) outs(%2497 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2495
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2493
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_2530 = tensor.cast %2498 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %2499 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2500 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2531 = torch.constant.int 12
    %2501 = torch.aten.item %2499 : !torch.vtensor<[],f32> -> !torch.float
    %2502 = torch_c.to_f64 %2501
    %2503 = torch.aten.item %2500 : !torch.vtensor<[],si8> -> !torch.int
    %2504 = torch_c.to_i64 %2503
    %c1_2532 = arith.constant 1 : index
    %c0_2533 = arith.constant 0 : index
    %c128_2534 = arith.constant 128 : index
    %c1_2535 = arith.constant 1 : index
    %c128_2536 = arith.constant 128 : index
    %c2_2537 = arith.constant 2 : index
    %c3_2538 = arith.constant 3 : index
    %c3_2539 = arith.constant 3 : index
    %c3_2540 = arith.constant 3 : index
    %2505 = tensor.empty() : tensor<128x128x3x3xi8>
    %2506 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%90 : tensor<128x128x3x3xf32>) outs(%2505 : tensor<128x128x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2503
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2501
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128x128x3x3xi8>
    %cast_2541 = tensor.cast %2506 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
    %cast_2542 = tensor.cast %cast_2541 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
    %2507 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2508 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2509 = torch.aten.item %2507 : !torch.vtensor<[],f32> -> !torch.float
    %2510 = torch_c.to_f64 %2509
    %2511 = torch.aten.item %2508 : !torch.vtensor<[],si8> -> !torch.int
    %2512 = torch_c.to_i64 %2511
    %cast_2543 = tensor.cast %cast_2542 : tensor<128x128x3x3xi8> to tensor<128x128x3x3xi8>
    %c1_2544 = arith.constant 1 : index
    %c0_2545 = arith.constant 0 : index
    %c128_2546 = arith.constant 128 : index
    %c1_2547 = arith.constant 1 : index
    %c128_2548 = arith.constant 128 : index
    %c2_2549 = arith.constant 2 : index
    %c3_2550 = arith.constant 3 : index
    %c3_2551 = arith.constant 3 : index
    %c3_2552 = arith.constant 3 : index
    %2513 = tensor.empty() : tensor<128x128x3x3xf32>
    %2514 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2543 : tensor<128x128x3x3xi8>) outs(%2513 : tensor<128x128x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2511
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2509
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128x128x3x3xf32>
    %cast_2553 = tensor.cast %2514 : tensor<128x128x3x3xf32> to tensor<128x128x3x3xf32>
    %2515 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2516 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2554 = torch.constant.int 12
    %2517 = torch.aten.item %2515 : !torch.vtensor<[],f32> -> !torch.float
    %2518 = torch_c.to_f64 %2517
    %2519 = torch.aten.item %2516 : !torch.vtensor<[],si8> -> !torch.int
    %2520 = torch_c.to_i64 %2519
    %c1_2555 = arith.constant 1 : index
    %c0_2556 = arith.constant 0 : index
    %c128_2557 = arith.constant 128 : index
    %2521 = tensor.empty() : tensor<128xi8>
    %2522 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%92 : tensor<128xf32>) outs(%2521 : tensor<128xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2519
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2517
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<128xi8>
    %cast_2558 = tensor.cast %2522 : tensor<128xi8> to tensor<128xi8>
    %cast_2559 = tensor.cast %cast_2558 : tensor<128xi8> to tensor<128xi8>
    %2523 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2524 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2525 = torch.aten.item %2523 : !torch.vtensor<[],f32> -> !torch.float
    %2526 = torch_c.to_f64 %2525
    %2527 = torch.aten.item %2524 : !torch.vtensor<[],si8> -> !torch.int
    %2528 = torch_c.to_i64 %2527
    %cast_2560 = tensor.cast %cast_2559 : tensor<128xi8> to tensor<128xi8>
    %c1_2561 = arith.constant 1 : index
    %c0_2562 = arith.constant 0 : index
    %c128_2563 = arith.constant 128 : index
    %2529 = tensor.empty() : tensor<128xf32>
    %2530 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2560 : tensor<128xi8>) outs(%2529 : tensor<128xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2527
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2525
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<128xf32>
    %cast_2564 = tensor.cast %2530 : tensor<128xf32> to tensor<128xf32>
    %int1_2565 = torch.constant.int 1
    %int1_2566 = torch.constant.int 1
    %int1_2567 = torch.constant.int 1
    %int1_2568 = torch.constant.int 1
    %int1_2569 = torch.constant.int 1
    %int1_2570 = torch.constant.int 1
    %int0_2571 = torch.constant.int 0
    %2531 = torch.prim.ListConstruct %int1_2565, %int1_2566 : (!torch.int, !torch.int) -> !torch.list<int>
    %2532 = torch.prim.ListConstruct %int1_2567, %int1_2568 : (!torch.int, !torch.int) -> !torch.list<int>
    %2533 = torch.prim.ListConstruct %int1_2569, %int1_2570 : (!torch.int, !torch.int) -> !torch.list<int>
    %2534 = torch.prim.ListConstruct %int0_2571, %int0_2571 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_2572 = torch.constant.bool false
    %int1_2573 = torch.constant.int 1
    %2535 = torch_c.to_i64 %int1_2573
    %2536 = torch_c.to_i64 %int1_2565
    %2537 = torch_c.to_i64 %int1_2566
    %2538 = torch_c.to_i64 %int0_2571
    %2539 = torch_c.to_i64 %int0_2571
    %c0_2574 = arith.constant 0 : index
    %c1_2575 = arith.constant 1 : index
    %c1_2576 = arith.constant 1 : index
    %c128_2577 = arith.constant 128 : index
    %c2_2578 = arith.constant 2 : index
    %c28_2579 = arith.constant 28 : index
    %c3_2580 = arith.constant 3 : index
    %c28_2581 = arith.constant 28 : index
    %c0_2582 = arith.constant 0 : index
    %c128_2583 = arith.constant 128 : index
    %c1_2584 = arith.constant 1 : index
    %c128_2585 = arith.constant 128 : index
    %c2_2586 = arith.constant 2 : index
    %c3_2587 = arith.constant 3 : index
    %c3_2588 = arith.constant 3 : index
    %c3_2589 = arith.constant 3 : index
    %2540 = arith.index_cast %2535 : i64 to index
    %c0_2590 = arith.constant 0 : index
    %2541 = arith.remsi %c128_2577, %2540 : index
    %2542 = arith.cmpi eq, %c0_2590, %2541 : index
    cf.assert %2542, "invalid: groups must divide input channel size evenly."
    %c0_2591 = arith.constant 0 : index
    %2543 = arith.remsi %c128_2583, %2540 : index
    %2544 = arith.cmpi eq, %c0_2591, %2543 : index
    cf.assert %2544, "invalid: groups must divide weight batch size evenly."
    %c1_i64_2592 = arith.constant 1 : i64
    %c1_i64_2593 = arith.constant 1 : i64
    %c1_i64_2594 = arith.constant 1 : i64
    %c1_i64_2595 = arith.constant 1 : i64
    %cst_2596 = arith.constant 0.000000e+00 : f32
    %c0_2597 = arith.constant 0 : index
    %c1_2598 = arith.constant 1 : index
    %c1_2599 = arith.constant 1 : index
    %c128_2600 = arith.constant 128 : index
    %c2_2601 = arith.constant 2 : index
    %c28_2602 = arith.constant 28 : index
    %c3_2603 = arith.constant 3 : index
    %c28_2604 = arith.constant 28 : index
    %c0_i64_2605 = arith.constant 0 : i64
    %2545 = arith.index_cast %c0_i64_2605 : i64 to index
    %2546 = arith.index_cast %c0_i64_2605 : i64 to index
    %2547 = arith.index_cast %2536 : i64 to index
    %2548 = arith.index_cast %2537 : i64 to index
    %padded_2606 = tensor.pad %cast_2530 low[%2545, %2546, %2547, %2548] high[%2545, %2546, %2547, %2548] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_2596 : f32
    } : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
    %2549 = arith.index_cast %c3_2587 : index to i64
    %c1_i64_2607 = arith.constant 1 : i64
    %c2_i64_2608 = arith.constant 2 : i64
    %2550 = arith.muli %2536, %c2_i64_2608 : i64
    %2551 = arith.index_cast %c28_2579 : index to i64
    %2552 = arith.addi %2551, %2550 : i64
    %2553 = arith.subi %2549, %c1_i64_2607 : i64
    %2554 = arith.muli %c1_i64_2592, %2553 : i64
    %2555 = arith.subi %2552, %2554 : i64
    %2556 = arith.subi %2555, %c1_i64_2607 : i64
    %2557 = arith.floordivsi %2556, %c1_i64_2594 : i64
    %2558 = arith.addi %2557, %c1_i64_2607 : i64
    %2559 = arith.index_cast %2558 : i64 to index
    %2560 = arith.index_cast %c3_2589 : index to i64
    %c1_i64_2609 = arith.constant 1 : i64
    %c2_i64_2610 = arith.constant 2 : i64
    %2561 = arith.muli %2537, %c2_i64_2610 : i64
    %2562 = arith.index_cast %c28_2581 : index to i64
    %2563 = arith.addi %2562, %2561 : i64
    %2564 = arith.subi %2560, %c1_i64_2609 : i64
    %2565 = arith.muli %c1_i64_2593, %2564 : i64
    %2566 = arith.subi %2563, %2565 : i64
    %2567 = arith.subi %2566, %c1_i64_2609 : i64
    %2568 = arith.floordivsi %2567, %c1_i64_2595 : i64
    %2569 = arith.addi %2568, %c1_i64_2609 : i64
    %2570 = arith.index_cast %2569 : i64 to index
    %2571 = tensor.empty(%2559, %2570) : tensor<1x128x?x?xf32>
    %2572 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2564 : tensor<128xf32>) outs(%2571 : tensor<1x128x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x128x?x?xf32>
    %2573 = arith.floordivsi %c128_2577, %2540 : index
    %2574 = arith.floordivsi %c128_2583, %2540 : index
    %c0_2611 = arith.constant 0 : index
    %c1_2612 = arith.constant 1 : index
    %2575 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2606, %cast_2553 : tensor<?x?x?x?xf32>, tensor<128x128x3x3xf32>) outs(%2572 : tensor<1x128x?x?xf32>) -> tensor<1x128x?x?xf32>
    %cast_2613 = tensor.cast %2575 : tensor<1x128x?x?xf32> to tensor<1x128x28x28xf32>
    %c1_2614 = arith.constant 1 : index
    %c1_2615 = arith.constant 1 : index
    %c128_2616 = arith.constant 128 : index
    %c2_2617 = arith.constant 2 : index
    %c28_2618 = arith.constant 28 : index
    %c3_2619 = arith.constant 3 : index
    %c28_2620 = arith.constant 28 : index
    %2576 = tensor.empty() : tensor<1x128x28x28xf32>
    %2577 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2613 : tensor<1x128x28x28xf32>) outs(%2576 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_2621 = tensor.cast %2577 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %2578 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2579 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2622 = torch.constant.int 12
    %2580 = torch.aten.item %2578 : !torch.vtensor<[],f32> -> !torch.float
    %2581 = torch_c.to_f64 %2580
    %2582 = torch.aten.item %2579 : !torch.vtensor<[],si8> -> !torch.int
    %2583 = torch_c.to_i64 %2582
    %c1_2623 = arith.constant 1 : index
    %c1_2624 = arith.constant 1 : index
    %c128_2625 = arith.constant 128 : index
    %c2_2626 = arith.constant 2 : index
    %c28_2627 = arith.constant 28 : index
    %c3_2628 = arith.constant 3 : index
    %c28_2629 = arith.constant 28 : index
    %2584 = tensor.empty() : tensor<1x128x28x28xi8>
    %2585 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2621 : tensor<1x128x28x28xf32>) outs(%2584 : tensor<1x128x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2582
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2580
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x128x28x28xi8>
    %cast_2630 = tensor.cast %2585 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %cast_2631 = tensor.cast %cast_2630 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %2586 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2587 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2588 = torch.aten.item %2586 : !torch.vtensor<[],f32> -> !torch.float
    %2589 = torch_c.to_f64 %2588
    %2590 = torch.aten.item %2587 : !torch.vtensor<[],si8> -> !torch.int
    %2591 = torch_c.to_i64 %2590
    %cast_2632 = tensor.cast %cast_2631 : tensor<1x128x28x28xi8> to tensor<1x128x28x28xi8>
    %c1_2633 = arith.constant 1 : index
    %c1_2634 = arith.constant 1 : index
    %c128_2635 = arith.constant 128 : index
    %c2_2636 = arith.constant 2 : index
    %c28_2637 = arith.constant 28 : index
    %c3_2638 = arith.constant 3 : index
    %c28_2639 = arith.constant 28 : index
    %2592 = tensor.empty() : tensor<1x128x28x28xf32>
    %2593 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2632 : tensor<1x128x28x28xi8>) outs(%2592 : tensor<1x128x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2590
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2588
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x128x28x28xf32>
    %cast_2640 = tensor.cast %2593 : tensor<1x128x28x28xf32> to tensor<1x128x28x28xf32>
    %2594 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2595 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2641 = torch.constant.int 12
    %2596 = torch.aten.item %2594 : !torch.vtensor<[],f32> -> !torch.float
    %2597 = torch_c.to_f64 %2596
    %2598 = torch.aten.item %2595 : !torch.vtensor<[],si8> -> !torch.int
    %2599 = torch_c.to_i64 %2598
    %c1_2642 = arith.constant 1 : index
    %c0_2643 = arith.constant 0 : index
    %c512_2644 = arith.constant 512 : index
    %c1_2645 = arith.constant 1 : index
    %c128_2646 = arith.constant 128 : index
    %2600 = tensor.empty() : tensor<512x128x1x1xi8>
    %2601 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%94 : tensor<512x128x1x1xf32>) outs(%2600 : tensor<512x128x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2598
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2596
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512x128x1x1xi8>
    %cast_2647 = tensor.cast %2601 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
    %cast_2648 = tensor.cast %cast_2647 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
    %2602 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2603 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2604 = torch.aten.item %2602 : !torch.vtensor<[],f32> -> !torch.float
    %2605 = torch_c.to_f64 %2604
    %2606 = torch.aten.item %2603 : !torch.vtensor<[],si8> -> !torch.int
    %2607 = torch_c.to_i64 %2606
    %cast_2649 = tensor.cast %cast_2648 : tensor<512x128x1x1xi8> to tensor<512x128x1x1xi8>
    %c1_2650 = arith.constant 1 : index
    %c0_2651 = arith.constant 0 : index
    %c512_2652 = arith.constant 512 : index
    %c1_2653 = arith.constant 1 : index
    %c128_2654 = arith.constant 128 : index
    %2608 = tensor.empty() : tensor<512x128x1x1xf32>
    %2609 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2649 : tensor<512x128x1x1xi8>) outs(%2608 : tensor<512x128x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2606
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2604
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512x128x1x1xf32>
    %cast_2655 = tensor.cast %2609 : tensor<512x128x1x1xf32> to tensor<512x128x1x1xf32>
    %2610 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2611 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2656 = torch.constant.int 12
    %2612 = torch.aten.item %2610 : !torch.vtensor<[],f32> -> !torch.float
    %2613 = torch_c.to_f64 %2612
    %2614 = torch.aten.item %2611 : !torch.vtensor<[],si8> -> !torch.int
    %2615 = torch_c.to_i64 %2614
    %c1_2657 = arith.constant 1 : index
    %c0_2658 = arith.constant 0 : index
    %c512_2659 = arith.constant 512 : index
    %2616 = tensor.empty() : tensor<512xi8>
    %2617 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%96 : tensor<512xf32>) outs(%2616 : tensor<512xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2614
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2612
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512xi8>
    %cast_2660 = tensor.cast %2617 : tensor<512xi8> to tensor<512xi8>
    %cast_2661 = tensor.cast %cast_2660 : tensor<512xi8> to tensor<512xi8>
    %2618 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2619 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2620 = torch.aten.item %2618 : !torch.vtensor<[],f32> -> !torch.float
    %2621 = torch_c.to_f64 %2620
    %2622 = torch.aten.item %2619 : !torch.vtensor<[],si8> -> !torch.int
    %2623 = torch_c.to_i64 %2622
    %cast_2662 = tensor.cast %cast_2661 : tensor<512xi8> to tensor<512xi8>
    %c1_2663 = arith.constant 1 : index
    %c0_2664 = arith.constant 0 : index
    %c512_2665 = arith.constant 512 : index
    %2624 = tensor.empty() : tensor<512xf32>
    %2625 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2662 : tensor<512xi8>) outs(%2624 : tensor<512xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2622
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2620
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512xf32>
    %cast_2666 = tensor.cast %2625 : tensor<512xf32> to tensor<512xf32>
    %int0_2667 = torch.constant.int 0
    %int0_2668 = torch.constant.int 0
    %int1_2669 = torch.constant.int 1
    %int1_2670 = torch.constant.int 1
    %int1_2671 = torch.constant.int 1
    %int1_2672 = torch.constant.int 1
    %int0_2673 = torch.constant.int 0
    %2626 = torch.prim.ListConstruct %int0_2667, %int0_2668 : (!torch.int, !torch.int) -> !torch.list<int>
    %2627 = torch.prim.ListConstruct %int1_2669, %int1_2670 : (!torch.int, !torch.int) -> !torch.list<int>
    %2628 = torch.prim.ListConstruct %int1_2671, %int1_2672 : (!torch.int, !torch.int) -> !torch.list<int>
    %2629 = torch.prim.ListConstruct %int0_2673, %int0_2673 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_2674 = torch.constant.bool false
    %int1_2675 = torch.constant.int 1
    %2630 = torch_c.to_i64 %int1_2675
    %2631 = torch_c.to_i64 %int0_2667
    %2632 = torch_c.to_i64 %int0_2668
    %2633 = torch_c.to_i64 %int0_2673
    %2634 = torch_c.to_i64 %int0_2673
    %c0_2676 = arith.constant 0 : index
    %c1_2677 = arith.constant 1 : index
    %c1_2678 = arith.constant 1 : index
    %c128_2679 = arith.constant 128 : index
    %c2_2680 = arith.constant 2 : index
    %c28_2681 = arith.constant 28 : index
    %c3_2682 = arith.constant 3 : index
    %c28_2683 = arith.constant 28 : index
    %c0_2684 = arith.constant 0 : index
    %c512_2685 = arith.constant 512 : index
    %c1_2686 = arith.constant 1 : index
    %c128_2687 = arith.constant 128 : index
    %c2_2688 = arith.constant 2 : index
    %c1_2689 = arith.constant 1 : index
    %c3_2690 = arith.constant 3 : index
    %c1_2691 = arith.constant 1 : index
    %2635 = arith.index_cast %2630 : i64 to index
    %c0_2692 = arith.constant 0 : index
    %2636 = arith.remsi %c128_2679, %2635 : index
    %2637 = arith.cmpi eq, %c0_2692, %2636 : index
    cf.assert %2637, "invalid: groups must divide input channel size evenly."
    %c0_2693 = arith.constant 0 : index
    %2638 = arith.remsi %c512_2685, %2635 : index
    %2639 = arith.cmpi eq, %c0_2693, %2638 : index
    cf.assert %2639, "invalid: groups must divide weight batch size evenly."
    %c1_i64_2694 = arith.constant 1 : i64
    %c1_i64_2695 = arith.constant 1 : i64
    %c1_i64_2696 = arith.constant 1 : i64
    %c1_i64_2697 = arith.constant 1 : i64
    %cst_2698 = arith.constant 0.000000e+00 : f32
    %c0_2699 = arith.constant 0 : index
    %c1_2700 = arith.constant 1 : index
    %c1_2701 = arith.constant 1 : index
    %c128_2702 = arith.constant 128 : index
    %c2_2703 = arith.constant 2 : index
    %c28_2704 = arith.constant 28 : index
    %c3_2705 = arith.constant 3 : index
    %c28_2706 = arith.constant 28 : index
    %c0_i64_2707 = arith.constant 0 : i64
    %2640 = arith.index_cast %c0_i64_2707 : i64 to index
    %2641 = arith.index_cast %c0_i64_2707 : i64 to index
    %2642 = arith.index_cast %2631 : i64 to index
    %2643 = arith.index_cast %2632 : i64 to index
    %padded_2708 = tensor.pad %cast_2640 low[%2640, %2641, %2642, %2643] high[%2640, %2641, %2642, %2643] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_2698 : f32
    } : tensor<1x128x28x28xf32> to tensor<?x?x?x?xf32>
    %2644 = arith.index_cast %c1_2689 : index to i64
    %c1_i64_2709 = arith.constant 1 : i64
    %c2_i64_2710 = arith.constant 2 : i64
    %2645 = arith.muli %2631, %c2_i64_2710 : i64
    %2646 = arith.index_cast %c28_2681 : index to i64
    %2647 = arith.addi %2646, %2645 : i64
    %2648 = arith.subi %2644, %c1_i64_2709 : i64
    %2649 = arith.muli %c1_i64_2694, %2648 : i64
    %2650 = arith.subi %2647, %2649 : i64
    %2651 = arith.subi %2650, %c1_i64_2709 : i64
    %2652 = arith.floordivsi %2651, %c1_i64_2696 : i64
    %2653 = arith.addi %2652, %c1_i64_2709 : i64
    %2654 = arith.index_cast %2653 : i64 to index
    %2655 = arith.index_cast %c1_2691 : index to i64
    %c1_i64_2711 = arith.constant 1 : i64
    %c2_i64_2712 = arith.constant 2 : i64
    %2656 = arith.muli %2632, %c2_i64_2712 : i64
    %2657 = arith.index_cast %c28_2683 : index to i64
    %2658 = arith.addi %2657, %2656 : i64
    %2659 = arith.subi %2655, %c1_i64_2711 : i64
    %2660 = arith.muli %c1_i64_2695, %2659 : i64
    %2661 = arith.subi %2658, %2660 : i64
    %2662 = arith.subi %2661, %c1_i64_2711 : i64
    %2663 = arith.floordivsi %2662, %c1_i64_2697 : i64
    %2664 = arith.addi %2663, %c1_i64_2711 : i64
    %2665 = arith.index_cast %2664 : i64 to index
    %2666 = tensor.empty(%2654, %2665) : tensor<1x512x?x?xf32>
    %2667 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2666 : tensor<512xf32>) outs(%2666 : tensor<1x512x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x512x?x?xf32>
    %2668 = arith.floordivsi %c128_2679, %2635 : index
    %2669 = arith.floordivsi %c512_2685, %2635 : index
    %c0_2713 = arith.constant 0 : index
    %c1_2714 = arith.constant 1 : index
    %2670 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2708, %cast_2655 : tensor<?x?x?x?xf32>, tensor<512x128x1x1xf32>) outs(%2667 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
    %cast_2715 = tensor.cast %2670 : tensor<1x512x?x?xf32> to tensor<1x512x28x28xf32>
    %2671 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2672 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2716 = torch.constant.int 12
    %2673 = torch.aten.item %2671 : !torch.vtensor<[],f32> -> !torch.float
    %2674 = torch_c.to_f64 %2673
    %2675 = torch.aten.item %2672 : !torch.vtensor<[],si8> -> !torch.int
    %2676 = torch_c.to_i64 %2675
    %c1_2717 = arith.constant 1 : index
    %c1_2718 = arith.constant 1 : index
    %c512_2719 = arith.constant 512 : index
    %c2_2720 = arith.constant 2 : index
    %c28_2721 = arith.constant 28 : index
    %c3_2722 = arith.constant 3 : index
    %c28_2723 = arith.constant 28 : index
    %2677 = tensor.empty() : tensor<1x512x28x28xi8>
    %2678 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2715 : tensor<1x512x28x28xf32>) outs(%2677 : tensor<1x512x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2675
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2673
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x28x28xi8>
    %cast_2724 = tensor.cast %2678 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %cast_2725 = tensor.cast %cast_2724 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %2679 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2680 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2681 = torch.aten.item %2679 : !torch.vtensor<[],f32> -> !torch.float
    %2682 = torch_c.to_f64 %2681
    %2683 = torch.aten.item %2680 : !torch.vtensor<[],si8> -> !torch.int
    %2684 = torch_c.to_i64 %2683
    %cast_2726 = tensor.cast %cast_2725 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %c1_2727 = arith.constant 1 : index
    %c1_2728 = arith.constant 1 : index
    %c512_2729 = arith.constant 512 : index
    %c2_2730 = arith.constant 2 : index
    %c28_2731 = arith.constant 28 : index
    %c3_2732 = arith.constant 3 : index
    %c28_2733 = arith.constant 28 : index
    %2685 = tensor.empty() : tensor<1x512x28x28xf32>
    %2686 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2726 : tensor<1x512x28x28xi8>) outs(%2685 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2683
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2681
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_2734 = tensor.cast %2686 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %int1_2735 = torch.constant.int 1
    %2687 = torch_c.to_i64 %int1_2735
    %c1_2736 = arith.constant 1 : index
    %c1_2737 = arith.constant 1 : index
    %c512_2738 = arith.constant 512 : index
    %c2_2739 = arith.constant 2 : index
    %c28_2740 = arith.constant 28 : index
    %c3_2741 = arith.constant 3 : index
    %c28_2742 = arith.constant 28 : index
    %c1_2743 = arith.constant 1 : index
    %c512_2744 = arith.constant 512 : index
    %2688 = arith.cmpi eq, %c512_2738, %c512_2744 : index
    cf.assert %2688, "mismatched size for broadcast"
    %c2_2745 = arith.constant 2 : index
    %c28_2746 = arith.constant 28 : index
    %2689 = arith.cmpi eq, %c28_2740, %c28_2746 : index
    cf.assert %2689, "mismatched size for broadcast"
    %c3_2747 = arith.constant 3 : index
    %c28_2748 = arith.constant 28 : index
    %2690 = arith.cmpi eq, %c28_2742, %c28_2748 : index
    cf.assert %2690, "mismatched size for broadcast"
    %2691 = tensor.empty() : tensor<1x512x28x28xf32>
    %2692 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2734, %cast_2428 : tensor<1x512x28x28xf32>, tensor<1x512x28x28xf32>) outs(%2691 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %2687 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_2749 = tensor.cast %2692 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %c1_2750 = arith.constant 1 : index
    %c1_2751 = arith.constant 1 : index
    %c512_2752 = arith.constant 512 : index
    %c2_2753 = arith.constant 2 : index
    %c28_2754 = arith.constant 28 : index
    %c3_2755 = arith.constant 3 : index
    %c28_2756 = arith.constant 28 : index
    %2693 = tensor.empty() : tensor<1x512x28x28xf32>
    %2694 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2749 : tensor<1x512x28x28xf32>) outs(%2693 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_2757 = tensor.cast %2694 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %2695 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2696 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2758 = torch.constant.int 12
    %2697 = torch.aten.item %2695 : !torch.vtensor<[],f32> -> !torch.float
    %2698 = torch_c.to_f64 %2697
    %2699 = torch.aten.item %2696 : !torch.vtensor<[],si8> -> !torch.int
    %2700 = torch_c.to_i64 %2699
    %c1_2759 = arith.constant 1 : index
    %c1_2760 = arith.constant 1 : index
    %c512_2761 = arith.constant 512 : index
    %c2_2762 = arith.constant 2 : index
    %c28_2763 = arith.constant 28 : index
    %c3_2764 = arith.constant 3 : index
    %c28_2765 = arith.constant 28 : index
    %2701 = tensor.empty() : tensor<1x512x28x28xi8>
    %2702 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2757 : tensor<1x512x28x28xf32>) outs(%2701 : tensor<1x512x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2699
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2697
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x28x28xi8>
    %cast_2766 = tensor.cast %2702 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %cast_2767 = tensor.cast %cast_2766 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %2703 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2704 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2705 = torch.aten.item %2703 : !torch.vtensor<[],f32> -> !torch.float
    %2706 = torch_c.to_f64 %2705
    %2707 = torch.aten.item %2704 : !torch.vtensor<[],si8> -> !torch.int
    %2708 = torch_c.to_i64 %2707
    %cast_2768 = tensor.cast %cast_2767 : tensor<1x512x28x28xi8> to tensor<1x512x28x28xi8>
    %c1_2769 = arith.constant 1 : index
    %c1_2770 = arith.constant 1 : index
    %c512_2771 = arith.constant 512 : index
    %c2_2772 = arith.constant 2 : index
    %c28_2773 = arith.constant 28 : index
    %c3_2774 = arith.constant 3 : index
    %c28_2775 = arith.constant 28 : index
    %2709 = tensor.empty() : tensor<1x512x28x28xf32>
    %2710 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2768 : tensor<1x512x28x28xi8>) outs(%2709 : tensor<1x512x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2707
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2705
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x28x28xf32>
    %cast_2776 = tensor.cast %2710 : tensor<1x512x28x28xf32> to tensor<1x512x28x28xf32>
    %2711 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2712 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2777 = torch.constant.int 12
    %2713 = torch.aten.item %2711 : !torch.vtensor<[],f32> -> !torch.float
    %2714 = torch_c.to_f64 %2713
    %2715 = torch.aten.item %2712 : !torch.vtensor<[],si8> -> !torch.int
    %2716 = torch_c.to_i64 %2715
    %c1_2778 = arith.constant 1 : index
    %c0_2779 = arith.constant 0 : index
    %c256_2780 = arith.constant 256 : index
    %c1_2781 = arith.constant 1 : index
    %c512_2782 = arith.constant 512 : index
    %2717 = tensor.empty() : tensor<256x512x1x1xi8>
    %2718 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%98 : tensor<256x512x1x1xf32>) outs(%2717 : tensor<256x512x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2715
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2713
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x512x1x1xi8>
    %cast_2783 = tensor.cast %2718 : tensor<256x512x1x1xi8> to tensor<256x512x1x1xi8>
    %cast_2784 = tensor.cast %cast_2783 : tensor<256x512x1x1xi8> to tensor<256x512x1x1xi8>
    %2719 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2720 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2721 = torch.aten.item %2719 : !torch.vtensor<[],f32> -> !torch.float
    %2722 = torch_c.to_f64 %2721
    %2723 = torch.aten.item %2720 : !torch.vtensor<[],si8> -> !torch.int
    %2724 = torch_c.to_i64 %2723
    %cast_2785 = tensor.cast %cast_2784 : tensor<256x512x1x1xi8> to tensor<256x512x1x1xi8>
    %c1_2786 = arith.constant 1 : index
    %c0_2787 = arith.constant 0 : index
    %c256_2788 = arith.constant 256 : index
    %c1_2789 = arith.constant 1 : index
    %c512_2790 = arith.constant 512 : index
    %2725 = tensor.empty() : tensor<256x512x1x1xf32>
    %2726 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2785 : tensor<256x512x1x1xi8>) outs(%2725 : tensor<256x512x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2723
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2721
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x512x1x1xf32>
    %cast_2791 = tensor.cast %2726 : tensor<256x512x1x1xf32> to tensor<256x512x1x1xf32>
    %2727 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2728 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2792 = torch.constant.int 12
    %2729 = torch.aten.item %2727 : !torch.vtensor<[],f32> -> !torch.float
    %2730 = torch_c.to_f64 %2729
    %2731 = torch.aten.item %2728 : !torch.vtensor<[],si8> -> !torch.int
    %2732 = torch_c.to_i64 %2731
    %c1_2793 = arith.constant 1 : index
    %c0_2794 = arith.constant 0 : index
    %c256_2795 = arith.constant 256 : index
    %2733 = tensor.empty() : tensor<256xi8>
    %2734 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%100 : tensor<256xf32>) outs(%2733 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2731
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2729
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_2796 = tensor.cast %2734 : tensor<256xi8> to tensor<256xi8>
    %cast_2797 = tensor.cast %cast_2796 : tensor<256xi8> to tensor<256xi8>
    %2735 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2736 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2737 = torch.aten.item %2735 : !torch.vtensor<[],f32> -> !torch.float
    %2738 = torch_c.to_f64 %2737
    %2739 = torch.aten.item %2736 : !torch.vtensor<[],si8> -> !torch.int
    %2740 = torch_c.to_i64 %2739
    %cast_2798 = tensor.cast %cast_2797 : tensor<256xi8> to tensor<256xi8>
    %c1_2799 = arith.constant 1 : index
    %c0_2800 = arith.constant 0 : index
    %c256_2801 = arith.constant 256 : index
    %2741 = tensor.empty() : tensor<256xf32>
    %2742 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2798 : tensor<256xi8>) outs(%2741 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2739
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2737
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_2802 = tensor.cast %2742 : tensor<256xf32> to tensor<256xf32>
    %int0_2803 = torch.constant.int 0
    %int0_2804 = torch.constant.int 0
    %int1_2805 = torch.constant.int 1
    %int1_2806 = torch.constant.int 1
    %int1_2807 = torch.constant.int 1
    %int1_2808 = torch.constant.int 1
    %int0_2809 = torch.constant.int 0
    %2743 = torch.prim.ListConstruct %int0_2803, %int0_2804 : (!torch.int, !torch.int) -> !torch.list<int>
    %2744 = torch.prim.ListConstruct %int1_2805, %int1_2806 : (!torch.int, !torch.int) -> !torch.list<int>
    %2745 = torch.prim.ListConstruct %int1_2807, %int1_2808 : (!torch.int, !torch.int) -> !torch.list<int>
    %2746 = torch.prim.ListConstruct %int0_2809, %int0_2809 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_2810 = torch.constant.bool false
    %int1_2811 = torch.constant.int 1
    %2747 = torch_c.to_i64 %int1_2811
    %2748 = torch_c.to_i64 %int0_2803
    %2749 = torch_c.to_i64 %int0_2804
    %2750 = torch_c.to_i64 %int0_2809
    %2751 = torch_c.to_i64 %int0_2809
    %c0_2812 = arith.constant 0 : index
    %c1_2813 = arith.constant 1 : index
    %c1_2814 = arith.constant 1 : index
    %c512_2815 = arith.constant 512 : index
    %c2_2816 = arith.constant 2 : index
    %c28_2817 = arith.constant 28 : index
    %c3_2818 = arith.constant 3 : index
    %c28_2819 = arith.constant 28 : index
    %c0_2820 = arith.constant 0 : index
    %c256_2821 = arith.constant 256 : index
    %c1_2822 = arith.constant 1 : index
    %c512_2823 = arith.constant 512 : index
    %c2_2824 = arith.constant 2 : index
    %c1_2825 = arith.constant 1 : index
    %c3_2826 = arith.constant 3 : index
    %c1_2827 = arith.constant 1 : index
    %2752 = arith.index_cast %2747 : i64 to index
    %c0_2828 = arith.constant 0 : index
    %2753 = arith.remsi %c512_2815, %2752 : index
    %2754 = arith.cmpi eq, %c0_2828, %2753 : index
    cf.assert %2754, "invalid: groups must divide input channel size evenly."
    %c0_2829 = arith.constant 0 : index
    %2755 = arith.remsi %c256_2821, %2752 : index
    %2756 = arith.cmpi eq, %c0_2829, %2755 : index
    cf.assert %2756, "invalid: groups must divide weight batch size evenly."
    %c1_i64_2830 = arith.constant 1 : i64
    %c1_i64_2831 = arith.constant 1 : i64
    %c1_i64_2832 = arith.constant 1 : i64
    %c1_i64_2833 = arith.constant 1 : i64
    %cst_2834 = arith.constant 0.000000e+00 : f32
    %c0_2835 = arith.constant 0 : index
    %c1_2836 = arith.constant 1 : index
    %c1_2837 = arith.constant 1 : index
    %c512_2838 = arith.constant 512 : index
    %c2_2839 = arith.constant 2 : index
    %c28_2840 = arith.constant 28 : index
    %c3_2841 = arith.constant 3 : index
    %c28_2842 = arith.constant 28 : index
    %c0_i64_2843 = arith.constant 0 : i64
    %2757 = arith.index_cast %c0_i64_2843 : i64 to index
    %2758 = arith.index_cast %c0_i64_2843 : i64 to index
    %2759 = arith.index_cast %2748 : i64 to index
    %2760 = arith.index_cast %2749 : i64 to index
    %padded_2844 = tensor.pad %cast_2776 low[%2757, %2758, %2759, %2760] high[%2757, %2758, %2759, %2760] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_2834 : f32
    } : tensor<1x512x28x28xf32> to tensor<?x?x?x?xf32>
    %2761 = arith.index_cast %c1_2825 : index to i64
    %c1_i64_2845 = arith.constant 1 : i64
    %c2_i64_2846 = arith.constant 2 : i64
    %2762 = arith.muli %2748, %c2_i64_2846 : i64
    %2763 = arith.index_cast %c28_2817 : index to i64
    %2764 = arith.addi %2763, %2762 : i64
    %2765 = arith.subi %2761, %c1_i64_2845 : i64
    %2766 = arith.muli %c1_i64_2830, %2765 : i64
    %2767 = arith.subi %2764, %2766 : i64
    %2768 = arith.subi %2767, %c1_i64_2845 : i64
    %2769 = arith.floordivsi %2768, %c1_i64_2832 : i64
    %2770 = arith.addi %2769, %c1_i64_2845 : i64
    %2771 = arith.index_cast %2770 : i64 to index
    %2772 = arith.index_cast %c1_2827 : index to i64
    %c1_i64_2847 = arith.constant 1 : i64
    %c2_i64_2848 = arith.constant 2 : i64
    %2773 = arith.muli %2749, %c2_i64_2848 : i64
    %2774 = arith.index_cast %c28_2819 : index to i64
    %2775 = arith.addi %2774, %2773 : i64
    %2776 = arith.subi %2772, %c1_i64_2847 : i64
    %2777 = arith.muli %c1_i64_2831, %2776 : i64
    %2778 = arith.subi %2775, %2777 : i64
    %2779 = arith.subi %2778, %c1_i64_2847 : i64
    %2780 = arith.floordivsi %2779, %c1_i64_2833 : i64
    %2781 = arith.addi %2780, %c1_i64_2847 : i64
    %2782 = arith.index_cast %2781 : i64 to index
    %2783 = tensor.empty(%2771, %2782) : tensor<1x256x?x?xf32>
    %2784 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2802 : tensor<256xf32>) outs(%2783 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %2785 = arith.floordivsi %c512_2815, %2752 : index
    %2786 = arith.floordivsi %c256_2821, %2752 : index
    %c0_2849 = arith.constant 0 : index
    %c1_2850 = arith.constant 1 : index
    %2787 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_2844, %cast_2791 : tensor<?x?x?x?xf32>, tensor<256x512x1x1xf32>) outs(%2784 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_2851 = tensor.cast %2787 : tensor<1x256x?x?xf32> to tensor<1x256x28x28xf32>
    %c1_2852 = arith.constant 1 : index
    %c1_2853 = arith.constant 1 : index
    %c256_2854 = arith.constant 256 : index
    %c2_2855 = arith.constant 2 : index
    %c28_2856 = arith.constant 28 : index
    %c3_2857 = arith.constant 3 : index
    %c28_2858 = arith.constant 28 : index
    %2788 = tensor.empty() : tensor<1x256x28x28xf32>
    %2789 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2851 : tensor<1x256x28x28xf32>) outs(%2788 : tensor<1x256x28x28xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x28x28xf32>
    %cast_2859 = tensor.cast %2789 : tensor<1x256x28x28xf32> to tensor<1x256x28x28xf32>
    %2790 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2791 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2860 = torch.constant.int 12
    %2792 = torch.aten.item %2790 : !torch.vtensor<[],f32> -> !torch.float
    %2793 = torch_c.to_f64 %2792
    %2794 = torch.aten.item %2791 : !torch.vtensor<[],si8> -> !torch.int
    %2795 = torch_c.to_i64 %2794
    %c1_2861 = arith.constant 1 : index
    %c1_2862 = arith.constant 1 : index
    %c256_2863 = arith.constant 256 : index
    %c2_2864 = arith.constant 2 : index
    %c28_2865 = arith.constant 28 : index
    %c3_2866 = arith.constant 3 : index
    %c28_2867 = arith.constant 28 : index
    %2796 = tensor.empty() : tensor<1x256x28x28xi8>
    %2797 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2859 : tensor<1x256x28x28xf32>) outs(%2796 : tensor<1x256x28x28xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2794
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2792
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x28x28xi8>
    %cast_2868 = tensor.cast %2797 : tensor<1x256x28x28xi8> to tensor<1x256x28x28xi8>
    %cast_2869 = tensor.cast %cast_2868 : tensor<1x256x28x28xi8> to tensor<1x256x28x28xi8>
    %2798 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2799 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2800 = torch.aten.item %2798 : !torch.vtensor<[],f32> -> !torch.float
    %2801 = torch_c.to_f64 %2800
    %2802 = torch.aten.item %2799 : !torch.vtensor<[],si8> -> !torch.int
    %2803 = torch_c.to_i64 %2802
    %cast_2870 = tensor.cast %cast_2869 : tensor<1x256x28x28xi8> to tensor<1x256x28x28xi8>
    %c1_2871 = arith.constant 1 : index
    %c1_2872 = arith.constant 1 : index
    %c256_2873 = arith.constant 256 : index
    %c2_2874 = arith.constant 2 : index
    %c28_2875 = arith.constant 28 : index
    %c3_2876 = arith.constant 3 : index
    %c28_2877 = arith.constant 28 : index
    %2804 = tensor.empty() : tensor<1x256x28x28xf32>
    %2805 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2870 : tensor<1x256x28x28xi8>) outs(%2804 : tensor<1x256x28x28xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2802
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2800
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x28x28xf32>
    %cast_2878 = tensor.cast %2805 : tensor<1x256x28x28xf32> to tensor<1x256x28x28xf32>
    %2806 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2807 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2879 = torch.constant.int 12
    %2808 = torch.aten.item %2806 : !torch.vtensor<[],f32> -> !torch.float
    %2809 = torch_c.to_f64 %2808
    %2810 = torch.aten.item %2807 : !torch.vtensor<[],si8> -> !torch.int
    %2811 = torch_c.to_i64 %2810
    %c1_2880 = arith.constant 1 : index
    %c0_2881 = arith.constant 0 : index
    %c256_2882 = arith.constant 256 : index
    %c1_2883 = arith.constant 1 : index
    %c256_2884 = arith.constant 256 : index
    %c2_2885 = arith.constant 2 : index
    %c3_2886 = arith.constant 3 : index
    %c3_2887 = arith.constant 3 : index
    %c3_2888 = arith.constant 3 : index
    %2812 = tensor.empty() : tensor<256x256x3x3xi8>
    %2813 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%102 : tensor<256x256x3x3xf32>) outs(%2812 : tensor<256x256x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2810
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2808
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x256x3x3xi8>
    %cast_2889 = tensor.cast %2813 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %cast_2890 = tensor.cast %cast_2889 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %2814 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2815 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2816 = torch.aten.item %2814 : !torch.vtensor<[],f32> -> !torch.float
    %2817 = torch_c.to_f64 %2816
    %2818 = torch.aten.item %2815 : !torch.vtensor<[],si8> -> !torch.int
    %2819 = torch_c.to_i64 %2818
    %cast_2891 = tensor.cast %cast_2890 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %c1_2892 = arith.constant 1 : index
    %c0_2893 = arith.constant 0 : index
    %c256_2894 = arith.constant 256 : index
    %c1_2895 = arith.constant 1 : index
    %c256_2896 = arith.constant 256 : index
    %c2_2897 = arith.constant 2 : index
    %c3_2898 = arith.constant 3 : index
    %c3_2899 = arith.constant 3 : index
    %c3_2900 = arith.constant 3 : index
    %2820 = tensor.empty() : tensor<256x256x3x3xf32>
    %2821 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2891 : tensor<256x256x3x3xi8>) outs(%2820 : tensor<256x256x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2818
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2816
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x256x3x3xf32>
    %cast_2901 = tensor.cast %2821 : tensor<256x256x3x3xf32> to tensor<256x256x3x3xf32>
    %2822 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2823 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2902 = torch.constant.int 12
    %2824 = torch.aten.item %2822 : !torch.vtensor<[],f32> -> !torch.float
    %2825 = torch_c.to_f64 %2824
    %2826 = torch.aten.item %2823 : !torch.vtensor<[],si8> -> !torch.int
    %2827 = torch_c.to_i64 %2826
    %c1_2903 = arith.constant 1 : index
    %c0_2904 = arith.constant 0 : index
    %c256_2905 = arith.constant 256 : index
    %2828 = tensor.empty() : tensor<256xi8>
    %2829 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%104 : tensor<256xf32>) outs(%2828 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2826
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2824
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_2906 = tensor.cast %2829 : tensor<256xi8> to tensor<256xi8>
    %cast_2907 = tensor.cast %cast_2906 : tensor<256xi8> to tensor<256xi8>
    %2830 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2831 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2832 = torch.aten.item %2830 : !torch.vtensor<[],f32> -> !torch.float
    %2833 = torch_c.to_f64 %2832
    %2834 = torch.aten.item %2831 : !torch.vtensor<[],si8> -> !torch.int
    %2835 = torch_c.to_i64 %2834
    %cast_2908 = tensor.cast %cast_2907 : tensor<256xi8> to tensor<256xi8>
    %c1_2909 = arith.constant 1 : index
    %c0_2910 = arith.constant 0 : index
    %c256_2911 = arith.constant 256 : index
    %2836 = tensor.empty() : tensor<256xf32>
    %2837 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_2908 : tensor<256xi8>) outs(%2836 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2834
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2832
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_2912 = tensor.cast %2837 : tensor<256xf32> to tensor<256xf32>
    %int1_2913 = torch.constant.int 1
    %int1_2914 = torch.constant.int 1
    %int1_2915 = torch.constant.int 1
    %int1_2916 = torch.constant.int 1
    %int2_2917 = torch.constant.int 2
    %int2_2918 = torch.constant.int 2
    %int0_2919 = torch.constant.int 0
    %2838 = torch.prim.ListConstruct %int1_2913, %int1_2914 : (!torch.int, !torch.int) -> !torch.list<int>
    %2839 = torch.prim.ListConstruct %int1_2915, %int1_2916 : (!torch.int, !torch.int) -> !torch.list<int>
    %2840 = torch.prim.ListConstruct %int2_2917, %int2_2918 : (!torch.int, !torch.int) -> !torch.list<int>
    %2841 = torch.prim.ListConstruct %int0_2919, %int0_2919 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_2920 = torch.constant.bool false
    %int1_2921 = torch.constant.int 1
    %2842 = torch_c.to_i64 %int1_2921
    %2843 = torch_c.to_i64 %int1_2913
    %2844 = torch_c.to_i64 %int1_2914
    %2845 = torch_c.to_i64 %int0_2919
    %2846 = torch_c.to_i64 %int0_2919
    %c0_2922 = arith.constant 0 : index
    %c1_2923 = arith.constant 1 : index
    %c1_2924 = arith.constant 1 : index
    %c256_2925 = arith.constant 256 : index
    %c2_2926 = arith.constant 2 : index
    %c28_2927 = arith.constant 28 : index
    %c3_2928 = arith.constant 3 : index
    %c28_2929 = arith.constant 28 : index
    %c0_2930 = arith.constant 0 : index
    %c256_2931 = arith.constant 256 : index
    %c1_2932 = arith.constant 1 : index
    %c256_2933 = arith.constant 256 : index
    %c2_2934 = arith.constant 2 : index
    %c3_2935 = arith.constant 3 : index
    %c3_2936 = arith.constant 3 : index
    %c3_2937 = arith.constant 3 : index
    %2847 = arith.index_cast %2842 : i64 to index
    %c0_2938 = arith.constant 0 : index
    %2848 = arith.remsi %c256_2925, %2847 : index
    %2849 = arith.cmpi eq, %c0_2938, %2848 : index
    cf.assert %2849, "invalid: groups must divide input channel size evenly."
    %c0_2939 = arith.constant 0 : index
    %2850 = arith.remsi %c256_2931, %2847 : index
    %2851 = arith.cmpi eq, %c0_2939, %2850 : index
    cf.assert %2851, "invalid: groups must divide weight batch size evenly."
    %c1_i64_2940 = arith.constant 1 : i64
    %c1_i64_2941 = arith.constant 1 : i64
    %c2_i64_2942 = arith.constant 2 : i64
    %c2_i64_2943 = arith.constant 2 : i64
    %cst_2944 = arith.constant 0.000000e+00 : f32
    %c0_2945 = arith.constant 0 : index
    %c1_2946 = arith.constant 1 : index
    %c1_2947 = arith.constant 1 : index
    %c256_2948 = arith.constant 256 : index
    %c2_2949 = arith.constant 2 : index
    %c28_2950 = arith.constant 28 : index
    %c3_2951 = arith.constant 3 : index
    %c28_2952 = arith.constant 28 : index
    %c0_i64_2953 = arith.constant 0 : i64
    %2852 = arith.index_cast %c0_i64_2953 : i64 to index
    %2853 = arith.index_cast %c0_i64_2953 : i64 to index
    %2854 = arith.index_cast %2843 : i64 to index
    %2855 = arith.index_cast %2844 : i64 to index
    %padded_2954 = tensor.pad %cast_2878 low[%2852, %2853, %2854, %2855] high[%2852, %2853, %2854, %2855] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_2944 : f32
    } : tensor<1x256x28x28xf32> to tensor<?x?x?x?xf32>
    %2856 = arith.index_cast %c3_2935 : index to i64
    %c1_i64_2955 = arith.constant 1 : i64
    %c2_i64_2956 = arith.constant 2 : i64
    %2857 = arith.muli %2843, %c2_i64_2956 : i64
    %2858 = arith.index_cast %c28_2927 : index to i64
    %2859 = arith.addi %2858, %2857 : i64
    %2860 = arith.subi %2856, %c1_i64_2955 : i64
    %2861 = arith.muli %c1_i64_2940, %2860 : i64
    %2862 = arith.subi %2859, %2861 : i64
    %2863 = arith.subi %2862, %c1_i64_2955 : i64
    %2864 = arith.floordivsi %2863, %c2_i64_2942 : i64
    %2865 = arith.addi %2864, %c1_i64_2955 : i64
    %2866 = arith.index_cast %2865 : i64 to index
    %2867 = arith.index_cast %c3_2937 : index to i64
    %c1_i64_2957 = arith.constant 1 : i64
    %c2_i64_2958 = arith.constant 2 : i64
    %2868 = arith.muli %2844, %c2_i64_2958 : i64
    %2869 = arith.index_cast %c28_2929 : index to i64
    %2870 = arith.addi %2869, %2868 : i64
    %2871 = arith.subi %2867, %c1_i64_2957 : i64
    %2872 = arith.muli %c1_i64_2941, %2871 : i64
    %2873 = arith.subi %2870, %2872 : i64
    %2874 = arith.subi %2873, %c1_i64_2957 : i64
    %2875 = arith.floordivsi %2874, %c2_i64_2943 : i64
    %2876 = arith.addi %2875, %c1_i64_2957 : i64
    %2877 = arith.index_cast %2876 : i64 to index
    %2878 = tensor.empty(%2866, %2877) : tensor<1x256x?x?xf32>
    %2879 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2912 : tensor<256xf32>) outs(%2878 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %2880 = arith.floordivsi %c256_2925, %2847 : index
    %2881 = arith.floordivsi %c256_2931, %2847 : index
    %c0_2959 = arith.constant 0 : index
    %c1_2960 = arith.constant 1 : index
    %2882 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_2954, %cast_2901 : tensor<?x?x?x?xf32>, tensor<256x256x3x3xf32>) outs(%2879 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_2961 = tensor.cast %2882 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
    %c1_2962 = arith.constant 1 : index
    %c1_2963 = arith.constant 1 : index
    %c256_2964 = arith.constant 256 : index
    %c2_2965 = arith.constant 2 : index
    %c14 = arith.constant 14 : index
    %c3_2966 = arith.constant 3 : index
    %c14_2967 = arith.constant 14 : index
    %2883 = tensor.empty() : tensor<1x256x14x14xf32>
    %2884 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2961 : tensor<1x256x14x14xf32>) outs(%2883 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_2968 = tensor.cast %2884 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %2885 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2886 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2969 = torch.constant.int 12
    %2887 = torch.aten.item %2885 : !torch.vtensor<[],f32> -> !torch.float
    %2888 = torch_c.to_f64 %2887
    %2889 = torch.aten.item %2886 : !torch.vtensor<[],si8> -> !torch.int
    %2890 = torch_c.to_i64 %2889
    %c1_2970 = arith.constant 1 : index
    %c1_2971 = arith.constant 1 : index
    %c256_2972 = arith.constant 256 : index
    %c2_2973 = arith.constant 2 : index
    %c14_2974 = arith.constant 14 : index
    %c3_2975 = arith.constant 3 : index
    %c14_2976 = arith.constant 14 : index
    %2891 = tensor.empty() : tensor<1x256x14x14xi8>
    %2892 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2968 : tensor<1x256x14x14xf32>) outs(%2891 : tensor<1x256x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2889
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2887
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x14x14xi8>
    %cast_2977 = tensor.cast %2892 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %cast_2978 = tensor.cast %cast_2977 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %2893 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2894 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2895 = torch.aten.item %2893 : !torch.vtensor<[],f32> -> !torch.float
    %2896 = torch_c.to_f64 %2895
    %2897 = torch.aten.item %2894 : !torch.vtensor<[],si8> -> !torch.int
    %2898 = torch_c.to_i64 %2897
    %cast_2979 = tensor.cast %cast_2978 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %c1_2980 = arith.constant 1 : index
    %c1_2981 = arith.constant 1 : index
    %c256_2982 = arith.constant 256 : index
    %c2_2983 = arith.constant 2 : index
    %c14_2984 = arith.constant 14 : index
    %c3_2985 = arith.constant 3 : index
    %c14_2986 = arith.constant 14 : index
    %2899 = tensor.empty() : tensor<1x256x14x14xf32>
    %2900 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2979 : tensor<1x256x14x14xi8>) outs(%2899 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2897
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2895
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_2987 = tensor.cast %2900 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %2901 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2902 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_2988 = torch.constant.int 12
    %2903 = torch.aten.item %2901 : !torch.vtensor<[],f32> -> !torch.float
    %2904 = torch_c.to_f64 %2903
    %2905 = torch.aten.item %2902 : !torch.vtensor<[],si8> -> !torch.int
    %2906 = torch_c.to_i64 %2905
    %c1_2989 = arith.constant 1 : index
    %c0_2990 = arith.constant 0 : index
    %c1024 = arith.constant 1024 : index
    %c1_2991 = arith.constant 1 : index
    %c256_2992 = arith.constant 256 : index
    %2907 = tensor.empty() : tensor<1024x256x1x1xi8>
    %2908 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%106 : tensor<1024x256x1x1xf32>) outs(%2907 : tensor<1024x256x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2905
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2903
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024x256x1x1xi8>
    %cast_2993 = tensor.cast %2908 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %cast_2994 = tensor.cast %cast_2993 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %2909 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2910 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2911 = torch.aten.item %2909 : !torch.vtensor<[],f32> -> !torch.float
    %2912 = torch_c.to_f64 %2911
    %2913 = torch.aten.item %2910 : !torch.vtensor<[],si8> -> !torch.int
    %2914 = torch_c.to_i64 %2913
    %cast_2995 = tensor.cast %cast_2994 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %c1_2996 = arith.constant 1 : index
    %c0_2997 = arith.constant 0 : index
    %c1024_2998 = arith.constant 1024 : index
    %c1_2999 = arith.constant 1 : index
    %c256_3000 = arith.constant 256 : index
    %2915 = tensor.empty() : tensor<1024x256x1x1xf32>
    %2916 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_2995 : tensor<1024x256x1x1xi8>) outs(%2915 : tensor<1024x256x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2913
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2911
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024x256x1x1xf32>
    %cast_3001 = tensor.cast %2916 : tensor<1024x256x1x1xf32> to tensor<1024x256x1x1xf32>
    %2917 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2918 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3002 = torch.constant.int 12
    %2919 = torch.aten.item %2917 : !torch.vtensor<[],f32> -> !torch.float
    %2920 = torch_c.to_f64 %2919
    %2921 = torch.aten.item %2918 : !torch.vtensor<[],si8> -> !torch.int
    %2922 = torch_c.to_i64 %2921
    %c1_3003 = arith.constant 1 : index
    %c0_3004 = arith.constant 0 : index
    %c1024_3005 = arith.constant 1024 : index
    %2923 = tensor.empty() : tensor<1024xi8>
    %2924 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%108 : tensor<1024xf32>) outs(%2923 : tensor<1024xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2921
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2919
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024xi8>
    %cast_3006 = tensor.cast %2924 : tensor<1024xi8> to tensor<1024xi8>
    %cast_3007 = tensor.cast %cast_3006 : tensor<1024xi8> to tensor<1024xi8>
    %2925 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2926 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2927 = torch.aten.item %2925 : !torch.vtensor<[],f32> -> !torch.float
    %2928 = torch_c.to_f64 %2927
    %2929 = torch.aten.item %2926 : !torch.vtensor<[],si8> -> !torch.int
    %2930 = torch_c.to_i64 %2929
    %cast_3008 = tensor.cast %cast_3007 : tensor<1024xi8> to tensor<1024xi8>
    %c1_3009 = arith.constant 1 : index
    %c0_3010 = arith.constant 0 : index
    %c1024_3011 = arith.constant 1024 : index
    %2931 = tensor.empty() : tensor<1024xf32>
    %2932 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3008 : tensor<1024xi8>) outs(%2931 : tensor<1024xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2929
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2927
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024xf32>
    %cast_3012 = tensor.cast %2932 : tensor<1024xf32> to tensor<1024xf32>
    %int0_3013 = torch.constant.int 0
    %int0_3014 = torch.constant.int 0
    %int1_3015 = torch.constant.int 1
    %int1_3016 = torch.constant.int 1
    %int1_3017 = torch.constant.int 1
    %int1_3018 = torch.constant.int 1
    %int0_3019 = torch.constant.int 0
    %2933 = torch.prim.ListConstruct %int0_3013, %int0_3014 : (!torch.int, !torch.int) -> !torch.list<int>
    %2934 = torch.prim.ListConstruct %int1_3015, %int1_3016 : (!torch.int, !torch.int) -> !torch.list<int>
    %2935 = torch.prim.ListConstruct %int1_3017, %int1_3018 : (!torch.int, !torch.int) -> !torch.list<int>
    %2936 = torch.prim.ListConstruct %int0_3019, %int0_3019 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_3020 = torch.constant.bool false
    %int1_3021 = torch.constant.int 1
    %2937 = torch_c.to_i64 %int1_3021
    %2938 = torch_c.to_i64 %int0_3013
    %2939 = torch_c.to_i64 %int0_3014
    %2940 = torch_c.to_i64 %int0_3019
    %2941 = torch_c.to_i64 %int0_3019
    %c0_3022 = arith.constant 0 : index
    %c1_3023 = arith.constant 1 : index
    %c1_3024 = arith.constant 1 : index
    %c256_3025 = arith.constant 256 : index
    %c2_3026 = arith.constant 2 : index
    %c14_3027 = arith.constant 14 : index
    %c3_3028 = arith.constant 3 : index
    %c14_3029 = arith.constant 14 : index
    %c0_3030 = arith.constant 0 : index
    %c1024_3031 = arith.constant 1024 : index
    %c1_3032 = arith.constant 1 : index
    %c256_3033 = arith.constant 256 : index
    %c2_3034 = arith.constant 2 : index
    %c1_3035 = arith.constant 1 : index
    %c3_3036 = arith.constant 3 : index
    %c1_3037 = arith.constant 1 : index
    %2942 = arith.index_cast %2937 : i64 to index
    %c0_3038 = arith.constant 0 : index
    %2943 = arith.remsi %c256_3025, %2942 : index
    %2944 = arith.cmpi eq, %c0_3038, %2943 : index
    cf.assert %2944, "invalid: groups must divide input channel size evenly."
    %c0_3039 = arith.constant 0 : index
    %2945 = arith.remsi %c1024_3031, %2942 : index
    %2946 = arith.cmpi eq, %c0_3039, %2945 : index
    cf.assert %2946, "invalid: groups must divide weight batch size evenly."
    %c1_i64_3040 = arith.constant 1 : i64
    %c1_i64_3041 = arith.constant 1 : i64
    %c1_i64_3042 = arith.constant 1 : i64
    %c1_i64_3043 = arith.constant 1 : i64
    %cst_3044 = arith.constant 0.000000e+00 : f32
    %c0_3045 = arith.constant 0 : index
    %c1_3046 = arith.constant 1 : index
    %c1_3047 = arith.constant 1 : index
    %c256_3048 = arith.constant 256 : index
    %c2_3049 = arith.constant 2 : index
    %c14_3050 = arith.constant 14 : index
    %c3_3051 = arith.constant 3 : index
    %c14_3052 = arith.constant 14 : index
    %c0_i64_3053 = arith.constant 0 : i64
    %2947 = arith.index_cast %c0_i64_3053 : i64 to index
    %2948 = arith.index_cast %c0_i64_3053 : i64 to index
    %2949 = arith.index_cast %2938 : i64 to index
    %2950 = arith.index_cast %2939 : i64 to index
    %padded_3054 = tensor.pad %cast_2987 low[%2947, %2948, %2949, %2950] high[%2947, %2948, %2949, %2950] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_3044 : f32
    } : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
    %2951 = arith.index_cast %c1_3035 : index to i64
    %c1_i64_3055 = arith.constant 1 : i64
    %c2_i64_3056 = arith.constant 2 : i64
    %2952 = arith.muli %2938, %c2_i64_3056 : i64
    %2953 = arith.index_cast %c14_3027 : index to i64
    %2954 = arith.addi %2953, %2952 : i64
    %2955 = arith.subi %2951, %c1_i64_3055 : i64
    %2956 = arith.muli %c1_i64_3040, %2955 : i64
    %2957 = arith.subi %2954, %2956 : i64
    %2958 = arith.subi %2957, %c1_i64_3055 : i64
    %2959 = arith.floordivsi %2958, %c1_i64_3042 : i64
    %2960 = arith.addi %2959, %c1_i64_3055 : i64
    %2961 = arith.index_cast %2960 : i64 to index
    %2962 = arith.index_cast %c1_3037 : index to i64
    %c1_i64_3057 = arith.constant 1 : i64
    %c2_i64_3058 = arith.constant 2 : i64
    %2963 = arith.muli %2939, %c2_i64_3058 : i64
    %2964 = arith.index_cast %c14_3029 : index to i64
    %2965 = arith.addi %2964, %2963 : i64
    %2966 = arith.subi %2962, %c1_i64_3057 : i64
    %2967 = arith.muli %c1_i64_3041, %2966 : i64
    %2968 = arith.subi %2965, %2967 : i64
    %2969 = arith.subi %2968, %c1_i64_3057 : i64
    %2970 = arith.floordivsi %2969, %c1_i64_3043 : i64
    %2971 = arith.addi %2970, %c1_i64_3057 : i64
    %2972 = arith.index_cast %2971 : i64 to index
    %2973 = tensor.empty(%2961, %2972) : tensor<1x1024x?x?xf32>
    %2974 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3012 : tensor<1024xf32>) outs(%2973 : tensor<1x1024x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x1024x?x?xf32>
    %2975 = arith.floordivsi %c256_3025, %2942 : index
    %2976 = arith.floordivsi %c1024_3031, %2942 : index
    %c0_3059 = arith.constant 0 : index
    %c1_3060 = arith.constant 1 : index
    %2977 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3054, %cast_3001 : tensor<?x?x?x?xf32>, tensor<1024x256x1x1xf32>) outs(%2974 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
    %cast_3061 = tensor.cast %2977 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
    %2978 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2979 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3062 = torch.constant.int 12
    %2980 = torch.aten.item %2978 : !torch.vtensor<[],f32> -> !torch.float
    %2981 = torch_c.to_f64 %2980
    %2982 = torch.aten.item %2979 : !torch.vtensor<[],si8> -> !torch.int
    %2983 = torch_c.to_i64 %2982
    %c1_3063 = arith.constant 1 : index
    %c1_3064 = arith.constant 1 : index
    %c1024_3065 = arith.constant 1024 : index
    %c2_3066 = arith.constant 2 : index
    %c14_3067 = arith.constant 14 : index
    %c3_3068 = arith.constant 3 : index
    %c14_3069 = arith.constant 14 : index
    %2984 = tensor.empty() : tensor<1x1024x14x14xi8>
    %2985 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3061 : tensor<1x1024x14x14xf32>) outs(%2984 : tensor<1x1024x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2982
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2980
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x1024x14x14xi8>
    %cast_3070 = tensor.cast %2985 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %cast_3071 = tensor.cast %cast_3070 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %2986 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2987 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %2988 = torch.aten.item %2986 : !torch.vtensor<[],f32> -> !torch.float
    %2989 = torch_c.to_f64 %2988
    %2990 = torch.aten.item %2987 : !torch.vtensor<[],si8> -> !torch.int
    %2991 = torch_c.to_i64 %2990
    %cast_3072 = tensor.cast %cast_3071 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %c1_3073 = arith.constant 1 : index
    %c1_3074 = arith.constant 1 : index
    %c1024_3075 = arith.constant 1024 : index
    %c2_3076 = arith.constant 2 : index
    %c14_3077 = arith.constant 14 : index
    %c3_3078 = arith.constant 3 : index
    %c14_3079 = arith.constant 14 : index
    %2992 = tensor.empty() : tensor<1x1024x14x14xf32>
    %2993 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3072 : tensor<1x1024x14x14xi8>) outs(%2992 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %2990
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %2988
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_3080 = tensor.cast %2993 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %2994 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %2995 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3081 = torch.constant.int 12
    %2996 = torch.aten.item %2994 : !torch.vtensor<[],f32> -> !torch.float
    %2997 = torch_c.to_f64 %2996
    %2998 = torch.aten.item %2995 : !torch.vtensor<[],si8> -> !torch.int
    %2999 = torch_c.to_i64 %2998
    %c1_3082 = arith.constant 1 : index
    %c0_3083 = arith.constant 0 : index
    %c1024_3084 = arith.constant 1024 : index
    %c1_3085 = arith.constant 1 : index
    %c512_3086 = arith.constant 512 : index
    %3000 = tensor.empty() : tensor<1024x512x1x1xi8>
    %3001 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%110 : tensor<1024x512x1x1xf32>) outs(%3000 : tensor<1024x512x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %2998
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %2996
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024x512x1x1xi8>
    %cast_3087 = tensor.cast %3001 : tensor<1024x512x1x1xi8> to tensor<1024x512x1x1xi8>
    %cast_3088 = tensor.cast %cast_3087 : tensor<1024x512x1x1xi8> to tensor<1024x512x1x1xi8>
    %3002 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3003 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3004 = torch.aten.item %3002 : !torch.vtensor<[],f32> -> !torch.float
    %3005 = torch_c.to_f64 %3004
    %3006 = torch.aten.item %3003 : !torch.vtensor<[],si8> -> !torch.int
    %3007 = torch_c.to_i64 %3006
    %cast_3089 = tensor.cast %cast_3088 : tensor<1024x512x1x1xi8> to tensor<1024x512x1x1xi8>
    %c1_3090 = arith.constant 1 : index
    %c0_3091 = arith.constant 0 : index
    %c1024_3092 = arith.constant 1024 : index
    %c1_3093 = arith.constant 1 : index
    %c512_3094 = arith.constant 512 : index
    %3008 = tensor.empty() : tensor<1024x512x1x1xf32>
    %3009 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3089 : tensor<1024x512x1x1xi8>) outs(%3008 : tensor<1024x512x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3006
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3004
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024x512x1x1xf32>
    %cast_3095 = tensor.cast %3009 : tensor<1024x512x1x1xf32> to tensor<1024x512x1x1xf32>
    %3010 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3011 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3096 = torch.constant.int 12
    %3012 = torch.aten.item %3010 : !torch.vtensor<[],f32> -> !torch.float
    %3013 = torch_c.to_f64 %3012
    %3014 = torch.aten.item %3011 : !torch.vtensor<[],si8> -> !torch.int
    %3015 = torch_c.to_i64 %3014
    %c1_3097 = arith.constant 1 : index
    %c0_3098 = arith.constant 0 : index
    %c1024_3099 = arith.constant 1024 : index
    %3016 = tensor.empty() : tensor<1024xi8>
    %3017 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%112 : tensor<1024xf32>) outs(%3016 : tensor<1024xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3014
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3012
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024xi8>
    %cast_3100 = tensor.cast %3017 : tensor<1024xi8> to tensor<1024xi8>
    %cast_3101 = tensor.cast %cast_3100 : tensor<1024xi8> to tensor<1024xi8>
    %3018 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3019 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3020 = torch.aten.item %3018 : !torch.vtensor<[],f32> -> !torch.float
    %3021 = torch_c.to_f64 %3020
    %3022 = torch.aten.item %3019 : !torch.vtensor<[],si8> -> !torch.int
    %3023 = torch_c.to_i64 %3022
    %cast_3102 = tensor.cast %cast_3101 : tensor<1024xi8> to tensor<1024xi8>
    %c1_3103 = arith.constant 1 : index
    %c0_3104 = arith.constant 0 : index
    %c1024_3105 = arith.constant 1024 : index
    %3024 = tensor.empty() : tensor<1024xf32>
    %3025 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3102 : tensor<1024xi8>) outs(%3024 : tensor<1024xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3022
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3020
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024xf32>
    %cast_3106 = tensor.cast %3025 : tensor<1024xf32> to tensor<1024xf32>
    %int0_3107 = torch.constant.int 0
    %int0_3108 = torch.constant.int 0
    %int1_3109 = torch.constant.int 1
    %int1_3110 = torch.constant.int 1
    %int2_3111 = torch.constant.int 2
    %int2_3112 = torch.constant.int 2
    %int0_3113 = torch.constant.int 0
    %3026 = torch.prim.ListConstruct %int0_3107, %int0_3108 : (!torch.int, !torch.int) -> !torch.list<int>
    %3027 = torch.prim.ListConstruct %int1_3109, %int1_3110 : (!torch.int, !torch.int) -> !torch.list<int>
    %3028 = torch.prim.ListConstruct %int2_3111, %int2_3112 : (!torch.int, !torch.int) -> !torch.list<int>
    %3029 = torch.prim.ListConstruct %int0_3113, %int0_3113 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_3114 = torch.constant.bool false
    %int1_3115 = torch.constant.int 1
    %3030 = torch_c.to_i64 %int1_3115
    %3031 = torch_c.to_i64 %int0_3107
    %3032 = torch_c.to_i64 %int0_3108
    %3033 = torch_c.to_i64 %int0_3113
    %3034 = torch_c.to_i64 %int0_3113
    %c0_3116 = arith.constant 0 : index
    %c1_3117 = arith.constant 1 : index
    %c1_3118 = arith.constant 1 : index
    %c512_3119 = arith.constant 512 : index
    %c2_3120 = arith.constant 2 : index
    %c28_3121 = arith.constant 28 : index
    %c3_3122 = arith.constant 3 : index
    %c28_3123 = arith.constant 28 : index
    %c0_3124 = arith.constant 0 : index
    %c1024_3125 = arith.constant 1024 : index
    %c1_3126 = arith.constant 1 : index
    %c512_3127 = arith.constant 512 : index
    %c2_3128 = arith.constant 2 : index
    %c1_3129 = arith.constant 1 : index
    %c3_3130 = arith.constant 3 : index
    %c1_3131 = arith.constant 1 : index
    %3035 = arith.index_cast %3030 : i64 to index
    %c0_3132 = arith.constant 0 : index
    %3036 = arith.remsi %c512_3119, %3035 : index
    %3037 = arith.cmpi eq, %c0_3132, %3036 : index
    cf.assert %3037, "invalid: groups must divide input channel size evenly."
    %c0_3133 = arith.constant 0 : index
    %3038 = arith.remsi %c1024_3125, %3035 : index
    %3039 = arith.cmpi eq, %c0_3133, %3038 : index
    cf.assert %3039, "invalid: groups must divide weight batch size evenly."
    %c1_i64_3134 = arith.constant 1 : i64
    %c1_i64_3135 = arith.constant 1 : i64
    %c2_i64_3136 = arith.constant 2 : i64
    %c2_i64_3137 = arith.constant 2 : i64
    %cst_3138 = arith.constant 0.000000e+00 : f32
    %c0_3139 = arith.constant 0 : index
    %c1_3140 = arith.constant 1 : index
    %c1_3141 = arith.constant 1 : index
    %c512_3142 = arith.constant 512 : index
    %c2_3143 = arith.constant 2 : index
    %c28_3144 = arith.constant 28 : index
    %c3_3145 = arith.constant 3 : index
    %c28_3146 = arith.constant 28 : index
    %c0_i64_3147 = arith.constant 0 : i64
    %3040 = arith.index_cast %c0_i64_3147 : i64 to index
    %3041 = arith.index_cast %c0_i64_3147 : i64 to index
    %3042 = arith.index_cast %3031 : i64 to index
    %3043 = arith.index_cast %3032 : i64 to index
    %padded_3148 = tensor.pad %cast_2776 low[%3040, %3041, %3042, %3043] high[%3040, %3041, %3042, %3043] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_3138 : f32
    } : tensor<1x512x28x28xf32> to tensor<?x?x?x?xf32>
    %3044 = arith.index_cast %c1_3129 : index to i64
    %c1_i64_3149 = arith.constant 1 : i64
    %c2_i64_3150 = arith.constant 2 : i64
    %3045 = arith.muli %3031, %c2_i64_3150 : i64
    %3046 = arith.index_cast %c28_3121 : index to i64
    %3047 = arith.addi %3046, %3045 : i64
    %3048 = arith.subi %3044, %c1_i64_3149 : i64
    %3049 = arith.muli %c1_i64_3134, %3048 : i64
    %3050 = arith.subi %3047, %3049 : i64
    %3051 = arith.subi %3050, %c1_i64_3149 : i64
    %3052 = arith.floordivsi %3051, %c2_i64_3136 : i64
    %3053 = arith.addi %3052, %c1_i64_3149 : i64
    %3054 = arith.index_cast %3053 : i64 to index
    %3055 = arith.index_cast %c1_3131 : index to i64
    %c1_i64_3151 = arith.constant 1 : i64
    %c2_i64_3152 = arith.constant 2 : i64
    %3056 = arith.muli %3032, %c2_i64_3152 : i64
    %3057 = arith.index_cast %c28_3123 : index to i64
    %3058 = arith.addi %3057, %3056 : i64
    %3059 = arith.subi %3055, %c1_i64_3151 : i64
    %3060 = arith.muli %c1_i64_3135, %3059 : i64
    %3061 = arith.subi %3058, %3060 : i64
    %3062 = arith.subi %3061, %c1_i64_3151 : i64
    %3063 = arith.floordivsi %3062, %c2_i64_3137 : i64
    %3064 = arith.addi %3063, %c1_i64_3151 : i64
    %3065 = arith.index_cast %3064 : i64 to index
    %3066 = tensor.empty(%3054, %3065) : tensor<1x1024x?x?xf32>
    %3067 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3106 : tensor<1024xf32>) outs(%3066 : tensor<1x1024x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x1024x?x?xf32>
    %3068 = arith.floordivsi %c512_3119, %3035 : index
    %3069 = arith.floordivsi %c1024_3125, %3035 : index
    %c0_3153 = arith.constant 0 : index
    %c1_3154 = arith.constant 1 : index
    %3070 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_3148, %cast_3095 : tensor<?x?x?x?xf32>, tensor<1024x512x1x1xf32>) outs(%3067 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
    %cast_3155 = tensor.cast %3070 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
    %3071 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3072 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3156 = torch.constant.int 12
    %3073 = torch.aten.item %3071 : !torch.vtensor<[],f32> -> !torch.float
    %3074 = torch_c.to_f64 %3073
    %3075 = torch.aten.item %3072 : !torch.vtensor<[],si8> -> !torch.int
    %3076 = torch_c.to_i64 %3075
    %c1_3157 = arith.constant 1 : index
    %c1_3158 = arith.constant 1 : index
    %c1024_3159 = arith.constant 1024 : index
    %c2_3160 = arith.constant 2 : index
    %c14_3161 = arith.constant 14 : index
    %c3_3162 = arith.constant 3 : index
    %c14_3163 = arith.constant 14 : index
    %3077 = tensor.empty() : tensor<1x1024x14x14xi8>
    %3078 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3155 : tensor<1x1024x14x14xf32>) outs(%3077 : tensor<1x1024x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3075
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3073
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x1024x14x14xi8>
    %cast_3164 = tensor.cast %3078 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %cast_3165 = tensor.cast %cast_3164 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %3079 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3080 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3081 = torch.aten.item %3079 : !torch.vtensor<[],f32> -> !torch.float
    %3082 = torch_c.to_f64 %3081
    %3083 = torch.aten.item %3080 : !torch.vtensor<[],si8> -> !torch.int
    %3084 = torch_c.to_i64 %3083
    %cast_3166 = tensor.cast %cast_3165 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %c1_3167 = arith.constant 1 : index
    %c1_3168 = arith.constant 1 : index
    %c1024_3169 = arith.constant 1024 : index
    %c2_3170 = arith.constant 2 : index
    %c14_3171 = arith.constant 14 : index
    %c3_3172 = arith.constant 3 : index
    %c14_3173 = arith.constant 14 : index
    %3085 = tensor.empty() : tensor<1x1024x14x14xf32>
    %3086 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3166 : tensor<1x1024x14x14xi8>) outs(%3085 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3083
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3081
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_3174 = tensor.cast %3086 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %int1_3175 = torch.constant.int 1
    %3087 = torch_c.to_i64 %int1_3175
    %c1_3176 = arith.constant 1 : index
    %c1_3177 = arith.constant 1 : index
    %c1024_3178 = arith.constant 1024 : index
    %c2_3179 = arith.constant 2 : index
    %c14_3180 = arith.constant 14 : index
    %c3_3181 = arith.constant 3 : index
    %c14_3182 = arith.constant 14 : index
    %c1_3183 = arith.constant 1 : index
    %c1024_3184 = arith.constant 1024 : index
    %3088 = arith.cmpi eq, %c1024_3178, %c1024_3184 : index
    cf.assert %3088, "mismatched size for broadcast"
    %c2_3185 = arith.constant 2 : index
    %c14_3186 = arith.constant 14 : index
    %3089 = arith.cmpi eq, %c14_3180, %c14_3186 : index
    cf.assert %3089, "mismatched size for broadcast"
    %c3_3187 = arith.constant 3 : index
    %c14_3188 = arith.constant 14 : index
    %3090 = arith.cmpi eq, %c14_3182, %c14_3188 : index
    cf.assert %3090, "mismatched size for broadcast"
    %3091 = tensor.empty() : tensor<1x1024x14x14xf32>
    %3092 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3080, %cast_3174 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%3091 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %3087 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_3189 = tensor.cast %3092 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %c1_3190 = arith.constant 1 : index
    %c1_3191 = arith.constant 1 : index
    %c1024_3192 = arith.constant 1024 : index
    %c2_3193 = arith.constant 2 : index
    %c14_3194 = arith.constant 14 : index
    %c3_3195 = arith.constant 3 : index
    %c14_3196 = arith.constant 14 : index
    %3093 = tensor.empty() : tensor<1x1024x14x14xf32>
    %3094 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3189 : tensor<1x1024x14x14xf32>) outs(%3093 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_3197 = tensor.cast %3094 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %3095 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3096 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3198 = torch.constant.int 12
    %3097 = torch.aten.item %3095 : !torch.vtensor<[],f32> -> !torch.float
    %3098 = torch_c.to_f64 %3097
    %3099 = torch.aten.item %3096 : !torch.vtensor<[],si8> -> !torch.int
    %3100 = torch_c.to_i64 %3099
    %c1_3199 = arith.constant 1 : index
    %c1_3200 = arith.constant 1 : index
    %c1024_3201 = arith.constant 1024 : index
    %c2_3202 = arith.constant 2 : index
    %c14_3203 = arith.constant 14 : index
    %c3_3204 = arith.constant 3 : index
    %c14_3205 = arith.constant 14 : index
    %3101 = tensor.empty() : tensor<1x1024x14x14xi8>
    %3102 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3197 : tensor<1x1024x14x14xf32>) outs(%3101 : tensor<1x1024x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3099
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3097
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x1024x14x14xi8>
    %cast_3206 = tensor.cast %3102 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %cast_3207 = tensor.cast %cast_3206 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %3103 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3104 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3105 = torch.aten.item %3103 : !torch.vtensor<[],f32> -> !torch.float
    %3106 = torch_c.to_f64 %3105
    %3107 = torch.aten.item %3104 : !torch.vtensor<[],si8> -> !torch.int
    %3108 = torch_c.to_i64 %3107
    %cast_3208 = tensor.cast %cast_3207 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %c1_3209 = arith.constant 1 : index
    %c1_3210 = arith.constant 1 : index
    %c1024_3211 = arith.constant 1024 : index
    %c2_3212 = arith.constant 2 : index
    %c14_3213 = arith.constant 14 : index
    %c3_3214 = arith.constant 3 : index
    %c14_3215 = arith.constant 14 : index
    %3109 = tensor.empty() : tensor<1x1024x14x14xf32>
    %3110 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3208 : tensor<1x1024x14x14xi8>) outs(%3109 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3107
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3105
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_3216 = tensor.cast %3110 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %3111 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %3112 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3217 = torch.constant.int 12
    %3113 = torch.aten.item %3111 : !torch.vtensor<[],f32> -> !torch.float
    %3114 = torch_c.to_f64 %3113
    %3115 = torch.aten.item %3112 : !torch.vtensor<[],si8> -> !torch.int
    %3116 = torch_c.to_i64 %3115
    %c1_3218 = arith.constant 1 : index
    %c0_3219 = arith.constant 0 : index
    %c256_3220 = arith.constant 256 : index
    %c1_3221 = arith.constant 1 : index
    %c1024_3222 = arith.constant 1024 : index
    %3117 = tensor.empty() : tensor<256x1024x1x1xi8>
    %3118 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%114 : tensor<256x1024x1x1xf32>) outs(%3117 : tensor<256x1024x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3115
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3113
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x1024x1x1xi8>
    %cast_3223 = tensor.cast %3118 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %cast_3224 = tensor.cast %cast_3223 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %3119 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %3120 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3121 = torch.aten.item %3119 : !torch.vtensor<[],f32> -> !torch.float
    %3122 = torch_c.to_f64 %3121
    %3123 = torch.aten.item %3120 : !torch.vtensor<[],si8> -> !torch.int
    %3124 = torch_c.to_i64 %3123
    %cast_3225 = tensor.cast %cast_3224 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %c1_3226 = arith.constant 1 : index
    %c0_3227 = arith.constant 0 : index
    %c256_3228 = arith.constant 256 : index
    %c1_3229 = arith.constant 1 : index
    %c1024_3230 = arith.constant 1024 : index
    %3125 = tensor.empty() : tensor<256x1024x1x1xf32>
    %3126 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3225 : tensor<256x1024x1x1xi8>) outs(%3125 : tensor<256x1024x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3123
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3121
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x1024x1x1xf32>
    %cast_3231 = tensor.cast %3126 : tensor<256x1024x1x1xf32> to tensor<256x1024x1x1xf32>
    %3127 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3128 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3232 = torch.constant.int 12
    %3129 = torch.aten.item %3127 : !torch.vtensor<[],f32> -> !torch.float
    %3130 = torch_c.to_f64 %3129
    %3131 = torch.aten.item %3128 : !torch.vtensor<[],si8> -> !torch.int
    %3132 = torch_c.to_i64 %3131
    %c1_3233 = arith.constant 1 : index
    %c0_3234 = arith.constant 0 : index
    %c256_3235 = arith.constant 256 : index
    %3133 = tensor.empty() : tensor<256xi8>
    %3134 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%116 : tensor<256xf32>) outs(%3133 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3131
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3129
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_3236 = tensor.cast %3134 : tensor<256xi8> to tensor<256xi8>
    %cast_3237 = tensor.cast %cast_3236 : tensor<256xi8> to tensor<256xi8>
    %3135 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3136 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3137 = torch.aten.item %3135 : !torch.vtensor<[],f32> -> !torch.float
    %3138 = torch_c.to_f64 %3137
    %3139 = torch.aten.item %3136 : !torch.vtensor<[],si8> -> !torch.int
    %3140 = torch_c.to_i64 %3139
    %cast_3238 = tensor.cast %cast_3237 : tensor<256xi8> to tensor<256xi8>
    %c1_3239 = arith.constant 1 : index
    %c0_3240 = arith.constant 0 : index
    %c256_3241 = arith.constant 256 : index
    %3141 = tensor.empty() : tensor<256xf32>
    %3142 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3238 : tensor<256xi8>) outs(%3141 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3139
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3137
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_3242 = tensor.cast %3142 : tensor<256xf32> to tensor<256xf32>
    %int0_3243 = torch.constant.int 0
    %int0_3244 = torch.constant.int 0
    %int1_3245 = torch.constant.int 1
    %int1_3246 = torch.constant.int 1
    %int1_3247 = torch.constant.int 1
    %int1_3248 = torch.constant.int 1
    %int0_3249 = torch.constant.int 0
    %3143 = torch.prim.ListConstruct %int0_3243, %int0_3244 : (!torch.int, !torch.int) -> !torch.list<int>
    %3144 = torch.prim.ListConstruct %int1_3245, %int1_3246 : (!torch.int, !torch.int) -> !torch.list<int>
    %3145 = torch.prim.ListConstruct %int1_3247, %int1_3248 : (!torch.int, !torch.int) -> !torch.list<int>
    %3146 = torch.prim.ListConstruct %int0_3249, %int0_3249 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_3250 = torch.constant.bool false
    %int1_3251 = torch.constant.int 1
    %3147 = torch_c.to_i64 %int1_3251
    %3148 = torch_c.to_i64 %int0_3243
    %3149 = torch_c.to_i64 %int0_3244
    %3150 = torch_c.to_i64 %int0_3249
    %3151 = torch_c.to_i64 %int0_3249
    %c0_3252 = arith.constant 0 : index
    %c1_3253 = arith.constant 1 : index
    %c1_3254 = arith.constant 1 : index
    %c1024_3255 = arith.constant 1024 : index
    %c2_3256 = arith.constant 2 : index
    %c14_3257 = arith.constant 14 : index
    %c3_3258 = arith.constant 3 : index
    %c14_3259 = arith.constant 14 : index
    %c0_3260 = arith.constant 0 : index
    %c256_3261 = arith.constant 256 : index
    %c1_3262 = arith.constant 1 : index
    %c1024_3263 = arith.constant 1024 : index
    %c2_3264 = arith.constant 2 : index
    %c1_3265 = arith.constant 1 : index
    %c3_3266 = arith.constant 3 : index
    %c1_3267 = arith.constant 1 : index
    %3152 = arith.index_cast %3147 : i64 to index
    %c0_3268 = arith.constant 0 : index
    %3153 = arith.remsi %c1024_3255, %3152 : index
    %3154 = arith.cmpi eq, %c0_3268, %3153 : index
    cf.assert %3154, "invalid: groups must divide input channel size evenly."
    %c0_3269 = arith.constant 0 : index
    %3155 = arith.remsi %c256_3261, %3152 : index
    %3156 = arith.cmpi eq, %c0_3269, %3155 : index
    cf.assert %3156, "invalid: groups must divide weight batch size evenly."
    %c1_i64_3270 = arith.constant 1 : i64
    %c1_i64_3271 = arith.constant 1 : i64
    %c1_i64_3272 = arith.constant 1 : i64
    %c1_i64_3273 = arith.constant 1 : i64
    %cst_3274 = arith.constant 0.000000e+00 : f32
    %c0_3275 = arith.constant 0 : index
    %c1_3276 = arith.constant 1 : index
    %c1_3277 = arith.constant 1 : index
    %c1024_3278 = arith.constant 1024 : index
    %c2_3279 = arith.constant 2 : index
    %c14_3280 = arith.constant 14 : index
    %c3_3281 = arith.constant 3 : index
    %c14_3282 = arith.constant 14 : index
    %c0_i64_3283 = arith.constant 0 : i64
    %3157 = arith.index_cast %c0_i64_3283 : i64 to index
    %3158 = arith.index_cast %c0_i64_3283 : i64 to index
    %3159 = arith.index_cast %3148 : i64 to index
    %3160 = arith.index_cast %3149 : i64 to index
    %padded_3284 = tensor.pad %cast_3216 low[%3157, %3158, %3159, %3160] high[%3157, %3158, %3159, %3160] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_3274 : f32
    } : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
    %3161 = arith.index_cast %c1_3265 : index to i64
    %c1_i64_3285 = arith.constant 1 : i64
    %c2_i64_3286 = arith.constant 2 : i64
    %3162 = arith.muli %3148, %c2_i64_3286 : i64
    %3163 = arith.index_cast %c14_3257 : index to i64
    %3164 = arith.addi %3163, %3162 : i64
    %3165 = arith.subi %3161, %c1_i64_3285 : i64
    %3166 = arith.muli %c1_i64_3270, %3165 : i64
    %3167 = arith.subi %3164, %3166 : i64
    %3168 = arith.subi %3167, %c1_i64_3285 : i64
    %3169 = arith.floordivsi %3168, %c1_i64_3272 : i64
    %3170 = arith.addi %3169, %c1_i64_3285 : i64
    %3171 = arith.index_cast %3170 : i64 to index
    %3172 = arith.index_cast %c1_3267 : index to i64
    %c1_i64_3287 = arith.constant 1 : i64
    %c2_i64_3288 = arith.constant 2 : i64
    %3173 = arith.muli %3149, %c2_i64_3288 : i64
    %3174 = arith.index_cast %c14_3259 : index to i64
    %3175 = arith.addi %3174, %3173 : i64
    %3176 = arith.subi %3172, %c1_i64_3287 : i64
    %3177 = arith.muli %c1_i64_3271, %3176 : i64
    %3178 = arith.subi %3175, %3177 : i64
    %3179 = arith.subi %3178, %c1_i64_3287 : i64
    %3180 = arith.floordivsi %3179, %c1_i64_3273 : i64
    %3181 = arith.addi %3180, %c1_i64_3287 : i64
    %3182 = arith.index_cast %3181 : i64 to index
    %3183 = tensor.empty(%3171, %3182) : tensor<1x256x?x?xf32>
    %3184 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3242 : tensor<256xf32>) outs(%3183 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %3185 = arith.floordivsi %c1024_3255, %3152 : index
    %3186 = arith.floordivsi %c256_3261, %3152 : index
    %c0_3289 = arith.constant 0 : index
    %c1_3290 = arith.constant 1 : index
    %3187 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3284, %cast_3231 : tensor<?x?x?x?xf32>, tensor<256x1024x1x1xf32>) outs(%3184 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_3291 = tensor.cast %3187 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
    %c1_3292 = arith.constant 1 : index
    %c1_3293 = arith.constant 1 : index
    %c256_3294 = arith.constant 256 : index
    %c2_3295 = arith.constant 2 : index
    %c14_3296 = arith.constant 14 : index
    %c3_3297 = arith.constant 3 : index
    %c14_3298 = arith.constant 14 : index
    %3188 = tensor.empty() : tensor<1x256x14x14xf32>
    %3189 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3291 : tensor<1x256x14x14xf32>) outs(%3188 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_3299 = tensor.cast %3189 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %3190 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3191 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3300 = torch.constant.int 12
    %3192 = torch.aten.item %3190 : !torch.vtensor<[],f32> -> !torch.float
    %3193 = torch_c.to_f64 %3192
    %3194 = torch.aten.item %3191 : !torch.vtensor<[],si8> -> !torch.int
    %3195 = torch_c.to_i64 %3194
    %c1_3301 = arith.constant 1 : index
    %c1_3302 = arith.constant 1 : index
    %c256_3303 = arith.constant 256 : index
    %c2_3304 = arith.constant 2 : index
    %c14_3305 = arith.constant 14 : index
    %c3_3306 = arith.constant 3 : index
    %c14_3307 = arith.constant 14 : index
    %3196 = tensor.empty() : tensor<1x256x14x14xi8>
    %3197 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3299 : tensor<1x256x14x14xf32>) outs(%3196 : tensor<1x256x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3194
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3192
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x14x14xi8>
    %cast_3308 = tensor.cast %3197 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %cast_3309 = tensor.cast %cast_3308 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %3198 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3199 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3200 = torch.aten.item %3198 : !torch.vtensor<[],f32> -> !torch.float
    %3201 = torch_c.to_f64 %3200
    %3202 = torch.aten.item %3199 : !torch.vtensor<[],si8> -> !torch.int
    %3203 = torch_c.to_i64 %3202
    %cast_3310 = tensor.cast %cast_3309 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %c1_3311 = arith.constant 1 : index
    %c1_3312 = arith.constant 1 : index
    %c256_3313 = arith.constant 256 : index
    %c2_3314 = arith.constant 2 : index
    %c14_3315 = arith.constant 14 : index
    %c3_3316 = arith.constant 3 : index
    %c14_3317 = arith.constant 14 : index
    %3204 = tensor.empty() : tensor<1x256x14x14xf32>
    %3205 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3310 : tensor<1x256x14x14xi8>) outs(%3204 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3202
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3200
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_3318 = tensor.cast %3205 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %3206 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3207 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3319 = torch.constant.int 12
    %3208 = torch.aten.item %3206 : !torch.vtensor<[],f32> -> !torch.float
    %3209 = torch_c.to_f64 %3208
    %3210 = torch.aten.item %3207 : !torch.vtensor<[],si8> -> !torch.int
    %3211 = torch_c.to_i64 %3210
    %c1_3320 = arith.constant 1 : index
    %c0_3321 = arith.constant 0 : index
    %c256_3322 = arith.constant 256 : index
    %c1_3323 = arith.constant 1 : index
    %c256_3324 = arith.constant 256 : index
    %c2_3325 = arith.constant 2 : index
    %c3_3326 = arith.constant 3 : index
    %c3_3327 = arith.constant 3 : index
    %c3_3328 = arith.constant 3 : index
    %3212 = tensor.empty() : tensor<256x256x3x3xi8>
    %3213 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%118 : tensor<256x256x3x3xf32>) outs(%3212 : tensor<256x256x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3210
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3208
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x256x3x3xi8>
    %cast_3329 = tensor.cast %3213 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %cast_3330 = tensor.cast %cast_3329 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %3214 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3215 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3216 = torch.aten.item %3214 : !torch.vtensor<[],f32> -> !torch.float
    %3217 = torch_c.to_f64 %3216
    %3218 = torch.aten.item %3215 : !torch.vtensor<[],si8> -> !torch.int
    %3219 = torch_c.to_i64 %3218
    %cast_3331 = tensor.cast %cast_3330 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %c1_3332 = arith.constant 1 : index
    %c0_3333 = arith.constant 0 : index
    %c256_3334 = arith.constant 256 : index
    %c1_3335 = arith.constant 1 : index
    %c256_3336 = arith.constant 256 : index
    %c2_3337 = arith.constant 2 : index
    %c3_3338 = arith.constant 3 : index
    %c3_3339 = arith.constant 3 : index
    %c3_3340 = arith.constant 3 : index
    %3220 = tensor.empty() : tensor<256x256x3x3xf32>
    %3221 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3331 : tensor<256x256x3x3xi8>) outs(%3220 : tensor<256x256x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3218
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3216
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x256x3x3xf32>
    %cast_3341 = tensor.cast %3221 : tensor<256x256x3x3xf32> to tensor<256x256x3x3xf32>
    %3222 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3223 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3342 = torch.constant.int 12
    %3224 = torch.aten.item %3222 : !torch.vtensor<[],f32> -> !torch.float
    %3225 = torch_c.to_f64 %3224
    %3226 = torch.aten.item %3223 : !torch.vtensor<[],si8> -> !torch.int
    %3227 = torch_c.to_i64 %3226
    %c1_3343 = arith.constant 1 : index
    %c0_3344 = arith.constant 0 : index
    %c256_3345 = arith.constant 256 : index
    %3228 = tensor.empty() : tensor<256xi8>
    %3229 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%120 : tensor<256xf32>) outs(%3228 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3226
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3224
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_3346 = tensor.cast %3229 : tensor<256xi8> to tensor<256xi8>
    %cast_3347 = tensor.cast %cast_3346 : tensor<256xi8> to tensor<256xi8>
    %3230 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3231 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3232 = torch.aten.item %3230 : !torch.vtensor<[],f32> -> !torch.float
    %3233 = torch_c.to_f64 %3232
    %3234 = torch.aten.item %3231 : !torch.vtensor<[],si8> -> !torch.int
    %3235 = torch_c.to_i64 %3234
    %cast_3348 = tensor.cast %cast_3347 : tensor<256xi8> to tensor<256xi8>
    %c1_3349 = arith.constant 1 : index
    %c0_3350 = arith.constant 0 : index
    %c256_3351 = arith.constant 256 : index
    %3236 = tensor.empty() : tensor<256xf32>
    %3237 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3348 : tensor<256xi8>) outs(%3236 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3234
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3232
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_3352 = tensor.cast %3237 : tensor<256xf32> to tensor<256xf32>
    %int1_3353 = torch.constant.int 1
    %int1_3354 = torch.constant.int 1
    %int1_3355 = torch.constant.int 1
    %int1_3356 = torch.constant.int 1
    %int1_3357 = torch.constant.int 1
    %int1_3358 = torch.constant.int 1
    %int0_3359 = torch.constant.int 0
    %3238 = torch.prim.ListConstruct %int1_3353, %int1_3354 : (!torch.int, !torch.int) -> !torch.list<int>
    %3239 = torch.prim.ListConstruct %int1_3355, %int1_3356 : (!torch.int, !torch.int) -> !torch.list<int>
    %3240 = torch.prim.ListConstruct %int1_3357, %int1_3358 : (!torch.int, !torch.int) -> !torch.list<int>
    %3241 = torch.prim.ListConstruct %int0_3359, %int0_3359 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_3360 = torch.constant.bool false
    %int1_3361 = torch.constant.int 1
    %3242 = torch_c.to_i64 %int1_3361
    %3243 = torch_c.to_i64 %int1_3353
    %3244 = torch_c.to_i64 %int1_3354
    %3245 = torch_c.to_i64 %int0_3359
    %3246 = torch_c.to_i64 %int0_3359
    %c0_3362 = arith.constant 0 : index
    %c1_3363 = arith.constant 1 : index
    %c1_3364 = arith.constant 1 : index
    %c256_3365 = arith.constant 256 : index
    %c2_3366 = arith.constant 2 : index
    %c14_3367 = arith.constant 14 : index
    %c3_3368 = arith.constant 3 : index
    %c14_3369 = arith.constant 14 : index
    %c0_3370 = arith.constant 0 : index
    %c256_3371 = arith.constant 256 : index
    %c1_3372 = arith.constant 1 : index
    %c256_3373 = arith.constant 256 : index
    %c2_3374 = arith.constant 2 : index
    %c3_3375 = arith.constant 3 : index
    %c3_3376 = arith.constant 3 : index
    %c3_3377 = arith.constant 3 : index
    %3247 = arith.index_cast %3242 : i64 to index
    %c0_3378 = arith.constant 0 : index
    %3248 = arith.remsi %c256_3365, %3247 : index
    %3249 = arith.cmpi eq, %c0_3378, %3248 : index
    cf.assert %3249, "invalid: groups must divide input channel size evenly."
    %c0_3379 = arith.constant 0 : index
    %3250 = arith.remsi %c256_3371, %3247 : index
    %3251 = arith.cmpi eq, %c0_3379, %3250 : index
    cf.assert %3251, "invalid: groups must divide weight batch size evenly."
    %c1_i64_3380 = arith.constant 1 : i64
    %c1_i64_3381 = arith.constant 1 : i64
    %c1_i64_3382 = arith.constant 1 : i64
    %c1_i64_3383 = arith.constant 1 : i64
    %cst_3384 = arith.constant 0.000000e+00 : f32
    %c0_3385 = arith.constant 0 : index
    %c1_3386 = arith.constant 1 : index
    %c1_3387 = arith.constant 1 : index
    %c256_3388 = arith.constant 256 : index
    %c2_3389 = arith.constant 2 : index
    %c14_3390 = arith.constant 14 : index
    %c3_3391 = arith.constant 3 : index
    %c14_3392 = arith.constant 14 : index
    %c0_i64_3393 = arith.constant 0 : i64
    %3252 = arith.index_cast %c0_i64_3393 : i64 to index
    %3253 = arith.index_cast %c0_i64_3393 : i64 to index
    %3254 = arith.index_cast %3243 : i64 to index
    %3255 = arith.index_cast %3244 : i64 to index
    %padded_3394 = tensor.pad %cast_3318 low[%3252, %3253, %3254, %3255] high[%3252, %3253, %3254, %3255] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_3384 : f32
    } : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
    %3256 = arith.index_cast %c3_3375 : index to i64
    %c1_i64_3395 = arith.constant 1 : i64
    %c2_i64_3396 = arith.constant 2 : i64
    %3257 = arith.muli %3243, %c2_i64_3396 : i64
    %3258 = arith.index_cast %c14_3367 : index to i64
    %3259 = arith.addi %3258, %3257 : i64
    %3260 = arith.subi %3256, %c1_i64_3395 : i64
    %3261 = arith.muli %c1_i64_3380, %3260 : i64
    %3262 = arith.subi %3259, %3261 : i64
    %3263 = arith.subi %3262, %c1_i64_3395 : i64
    %3264 = arith.floordivsi %3263, %c1_i64_3382 : i64
    %3265 = arith.addi %3264, %c1_i64_3395 : i64
    %3266 = arith.index_cast %3265 : i64 to index
    %3267 = arith.index_cast %c3_3377 : index to i64
    %c1_i64_3397 = arith.constant 1 : i64
    %c2_i64_3398 = arith.constant 2 : i64
    %3268 = arith.muli %3244, %c2_i64_3398 : i64
    %3269 = arith.index_cast %c14_3369 : index to i64
    %3270 = arith.addi %3269, %3268 : i64
    %3271 = arith.subi %3267, %c1_i64_3397 : i64
    %3272 = arith.muli %c1_i64_3381, %3271 : i64
    %3273 = arith.subi %3270, %3272 : i64
    %3274 = arith.subi %3273, %c1_i64_3397 : i64
    %3275 = arith.floordivsi %3274, %c1_i64_3383 : i64
    %3276 = arith.addi %3275, %c1_i64_3397 : i64
    %3277 = arith.index_cast %3276 : i64 to index
    %3278 = tensor.empty(%3266, %3277) : tensor<1x256x?x?xf32>
    %3279 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3352 : tensor<256xf32>) outs(%3278 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %3280 = arith.floordivsi %c256_3365, %3247 : index
    %3281 = arith.floordivsi %c256_3371, %3247 : index
    %c0_3399 = arith.constant 0 : index
    %c1_3400 = arith.constant 1 : index
    %3282 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3394, %cast_3341 : tensor<?x?x?x?xf32>, tensor<256x256x3x3xf32>) outs(%3279 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_3401 = tensor.cast %3282 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
    %c1_3402 = arith.constant 1 : index
    %c1_3403 = arith.constant 1 : index
    %c256_3404 = arith.constant 256 : index
    %c2_3405 = arith.constant 2 : index
    %c14_3406 = arith.constant 14 : index
    %c3_3407 = arith.constant 3 : index
    %c14_3408 = arith.constant 14 : index
    %3283 = tensor.empty() : tensor<1x256x14x14xf32>
    %3284 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3401 : tensor<1x256x14x14xf32>) outs(%3283 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_3409 = tensor.cast %3284 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %3285 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3286 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3410 = torch.constant.int 12
    %3287 = torch.aten.item %3285 : !torch.vtensor<[],f32> -> !torch.float
    %3288 = torch_c.to_f64 %3287
    %3289 = torch.aten.item %3286 : !torch.vtensor<[],si8> -> !torch.int
    %3290 = torch_c.to_i64 %3289
    %c1_3411 = arith.constant 1 : index
    %c1_3412 = arith.constant 1 : index
    %c256_3413 = arith.constant 256 : index
    %c2_3414 = arith.constant 2 : index
    %c14_3415 = arith.constant 14 : index
    %c3_3416 = arith.constant 3 : index
    %c14_3417 = arith.constant 14 : index
    %3291 = tensor.empty() : tensor<1x256x14x14xi8>
    %3292 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3409 : tensor<1x256x14x14xf32>) outs(%3291 : tensor<1x256x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3289
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3287
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x14x14xi8>
    %cast_3418 = tensor.cast %3292 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %cast_3419 = tensor.cast %cast_3418 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %3293 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3294 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3295 = torch.aten.item %3293 : !torch.vtensor<[],f32> -> !torch.float
    %3296 = torch_c.to_f64 %3295
    %3297 = torch.aten.item %3294 : !torch.vtensor<[],si8> -> !torch.int
    %3298 = torch_c.to_i64 %3297
    %cast_3420 = tensor.cast %cast_3419 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %c1_3421 = arith.constant 1 : index
    %c1_3422 = arith.constant 1 : index
    %c256_3423 = arith.constant 256 : index
    %c2_3424 = arith.constant 2 : index
    %c14_3425 = arith.constant 14 : index
    %c3_3426 = arith.constant 3 : index
    %c14_3427 = arith.constant 14 : index
    %3299 = tensor.empty() : tensor<1x256x14x14xf32>
    %3300 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3420 : tensor<1x256x14x14xi8>) outs(%3299 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3297
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3295
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_3428 = tensor.cast %3300 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %3301 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3302 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3429 = torch.constant.int 12
    %3303 = torch.aten.item %3301 : !torch.vtensor<[],f32> -> !torch.float
    %3304 = torch_c.to_f64 %3303
    %3305 = torch.aten.item %3302 : !torch.vtensor<[],si8> -> !torch.int
    %3306 = torch_c.to_i64 %3305
    %c1_3430 = arith.constant 1 : index
    %c0_3431 = arith.constant 0 : index
    %c1024_3432 = arith.constant 1024 : index
    %c1_3433 = arith.constant 1 : index
    %c256_3434 = arith.constant 256 : index
    %3307 = tensor.empty() : tensor<1024x256x1x1xi8>
    %3308 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%122 : tensor<1024x256x1x1xf32>) outs(%3307 : tensor<1024x256x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3305
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3303
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024x256x1x1xi8>
    %cast_3435 = tensor.cast %3308 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %cast_3436 = tensor.cast %cast_3435 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %3309 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3310 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3311 = torch.aten.item %3309 : !torch.vtensor<[],f32> -> !torch.float
    %3312 = torch_c.to_f64 %3311
    %3313 = torch.aten.item %3310 : !torch.vtensor<[],si8> -> !torch.int
    %3314 = torch_c.to_i64 %3313
    %cast_3437 = tensor.cast %cast_3436 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %c1_3438 = arith.constant 1 : index
    %c0_3439 = arith.constant 0 : index
    %c1024_3440 = arith.constant 1024 : index
    %c1_3441 = arith.constant 1 : index
    %c256_3442 = arith.constant 256 : index
    %3315 = tensor.empty() : tensor<1024x256x1x1xf32>
    %3316 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3437 : tensor<1024x256x1x1xi8>) outs(%3315 : tensor<1024x256x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3313
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3311
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024x256x1x1xf32>
    %cast_3443 = tensor.cast %3316 : tensor<1024x256x1x1xf32> to tensor<1024x256x1x1xf32>
    %3317 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3318 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3444 = torch.constant.int 12
    %3319 = torch.aten.item %3317 : !torch.vtensor<[],f32> -> !torch.float
    %3320 = torch_c.to_f64 %3319
    %3321 = torch.aten.item %3318 : !torch.vtensor<[],si8> -> !torch.int
    %3322 = torch_c.to_i64 %3321
    %c1_3445 = arith.constant 1 : index
    %c0_3446 = arith.constant 0 : index
    %c1024_3447 = arith.constant 1024 : index
    %3323 = tensor.empty() : tensor<1024xi8>
    %3324 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%124 : tensor<1024xf32>) outs(%3323 : tensor<1024xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3321
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3319
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024xi8>
    %cast_3448 = tensor.cast %3324 : tensor<1024xi8> to tensor<1024xi8>
    %cast_3449 = tensor.cast %cast_3448 : tensor<1024xi8> to tensor<1024xi8>
    %3325 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3326 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3327 = torch.aten.item %3325 : !torch.vtensor<[],f32> -> !torch.float
    %3328 = torch_c.to_f64 %3327
    %3329 = torch.aten.item %3326 : !torch.vtensor<[],si8> -> !torch.int
    %3330 = torch_c.to_i64 %3329
    %cast_3450 = tensor.cast %cast_3449 : tensor<1024xi8> to tensor<1024xi8>
    %c1_3451 = arith.constant 1 : index
    %c0_3452 = arith.constant 0 : index
    %c1024_3453 = arith.constant 1024 : index
    %3331 = tensor.empty() : tensor<1024xf32>
    %3332 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3450 : tensor<1024xi8>) outs(%3331 : tensor<1024xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3329
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3327
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024xf32>
    %cast_3454 = tensor.cast %3332 : tensor<1024xf32> to tensor<1024xf32>
    %int0_3455 = torch.constant.int 0
    %int0_3456 = torch.constant.int 0
    %int1_3457 = torch.constant.int 1
    %int1_3458 = torch.constant.int 1
    %int1_3459 = torch.constant.int 1
    %int1_3460 = torch.constant.int 1
    %int0_3461 = torch.constant.int 0
    %3333 = torch.prim.ListConstruct %int0_3455, %int0_3456 : (!torch.int, !torch.int) -> !torch.list<int>
    %3334 = torch.prim.ListConstruct %int1_3457, %int1_3458 : (!torch.int, !torch.int) -> !torch.list<int>
    %3335 = torch.prim.ListConstruct %int1_3459, %int1_3460 : (!torch.int, !torch.int) -> !torch.list<int>
    %3336 = torch.prim.ListConstruct %int0_3461, %int0_3461 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_3462 = torch.constant.bool false
    %int1_3463 = torch.constant.int 1
    %3337 = torch_c.to_i64 %int1_3463
    %3338 = torch_c.to_i64 %int0_3455
    %3339 = torch_c.to_i64 %int0_3456
    %3340 = torch_c.to_i64 %int0_3461
    %3341 = torch_c.to_i64 %int0_3461
    %c0_3464 = arith.constant 0 : index
    %c1_3465 = arith.constant 1 : index
    %c1_3466 = arith.constant 1 : index
    %c256_3467 = arith.constant 256 : index
    %c2_3468 = arith.constant 2 : index
    %c14_3469 = arith.constant 14 : index
    %c3_3470 = arith.constant 3 : index
    %c14_3471 = arith.constant 14 : index
    %c0_3472 = arith.constant 0 : index
    %c1024_3473 = arith.constant 1024 : index
    %c1_3474 = arith.constant 1 : index
    %c256_3475 = arith.constant 256 : index
    %c2_3476 = arith.constant 2 : index
    %c1_3477 = arith.constant 1 : index
    %c3_3478 = arith.constant 3 : index
    %c1_3479 = arith.constant 1 : index
    %3342 = arith.index_cast %3337 : i64 to index
    %c0_3480 = arith.constant 0 : index
    %3343 = arith.remsi %c256_3467, %3342 : index
    %3344 = arith.cmpi eq, %c0_3480, %3343 : index
    cf.assert %3344, "invalid: groups must divide input channel size evenly."
    %c0_3481 = arith.constant 0 : index
    %3345 = arith.remsi %c1024_3473, %3342 : index
    %3346 = arith.cmpi eq, %c0_3481, %3345 : index
    cf.assert %3346, "invalid: groups must divide weight batch size evenly."
    %c1_i64_3482 = arith.constant 1 : i64
    %c1_i64_3483 = arith.constant 1 : i64
    %c1_i64_3484 = arith.constant 1 : i64
    %c1_i64_3485 = arith.constant 1 : i64
    %cst_3486 = arith.constant 0.000000e+00 : f32
    %c0_3487 = arith.constant 0 : index
    %c1_3488 = arith.constant 1 : index
    %c1_3489 = arith.constant 1 : index
    %c256_3490 = arith.constant 256 : index
    %c2_3491 = arith.constant 2 : index
    %c14_3492 = arith.constant 14 : index
    %c3_3493 = arith.constant 3 : index
    %c14_3494 = arith.constant 14 : index
    %c0_i64_3495 = arith.constant 0 : i64
    %3347 = arith.index_cast %c0_i64_3495 : i64 to index
    %3348 = arith.index_cast %c0_i64_3495 : i64 to index
    %3349 = arith.index_cast %3338 : i64 to index
    %3350 = arith.index_cast %3339 : i64 to index
    %padded_3496 = tensor.pad %cast_3428 low[%3347, %3348, %3349, %3350] high[%3347, %3348, %3349, %3350] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_3486 : f32
    } : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
    %3351 = arith.index_cast %c1_3477 : index to i64
    %c1_i64_3497 = arith.constant 1 : i64
    %c2_i64_3498 = arith.constant 2 : i64
    %3352 = arith.muli %3338, %c2_i64_3498 : i64
    %3353 = arith.index_cast %c14_3469 : index to i64
    %3354 = arith.addi %3353, %3352 : i64
    %3355 = arith.subi %3351, %c1_i64_3497 : i64
    %3356 = arith.muli %c1_i64_3482, %3355 : i64
    %3357 = arith.subi %3354, %3356 : i64
    %3358 = arith.subi %3357, %c1_i64_3497 : i64
    %3359 = arith.floordivsi %3358, %c1_i64_3484 : i64
    %3360 = arith.addi %3359, %c1_i64_3497 : i64
    %3361 = arith.index_cast %3360 : i64 to index
    %3362 = arith.index_cast %c1_3479 : index to i64
    %c1_i64_3499 = arith.constant 1 : i64
    %c2_i64_3500 = arith.constant 2 : i64
    %3363 = arith.muli %3339, %c2_i64_3500 : i64
    %3364 = arith.index_cast %c14_3471 : index to i64
    %3365 = arith.addi %3364, %3363 : i64
    %3366 = arith.subi %3362, %c1_i64_3499 : i64
    %3367 = arith.muli %c1_i64_3483, %3366 : i64
    %3368 = arith.subi %3365, %3367 : i64
    %3369 = arith.subi %3368, %c1_i64_3499 : i64
    %3370 = arith.floordivsi %3369, %c1_i64_3485 : i64
    %3371 = arith.addi %3370, %c1_i64_3499 : i64
    %3372 = arith.index_cast %3371 : i64 to index
    %3373 = tensor.empty(%3361, %3372) : tensor<1x1024x?x?xf32>
    %3374 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3454 : tensor<1024xf32>) outs(%3373 : tensor<1x1024x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x1024x?x?xf32>
    %3375 = arith.floordivsi %c256_3467, %3342 : index
    %3376 = arith.floordivsi %c1024_3473, %3342 : index
    %c0_3501 = arith.constant 0 : index
    %c1_3502 = arith.constant 1 : index
    %3377 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3496, %cast_3443 : tensor<?x?x?x?xf32>, tensor<1024x256x1x1xf32>) outs(%3374 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
    %cast_3503 = tensor.cast %3377 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
    %3378 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3379 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3504 = torch.constant.int 12
    %3380 = torch.aten.item %3378 : !torch.vtensor<[],f32> -> !torch.float
    %3381 = torch_c.to_f64 %3380
    %3382 = torch.aten.item %3379 : !torch.vtensor<[],si8> -> !torch.int
    %3383 = torch_c.to_i64 %3382
    %c1_3505 = arith.constant 1 : index
    %c1_3506 = arith.constant 1 : index
    %c1024_3507 = arith.constant 1024 : index
    %c2_3508 = arith.constant 2 : index
    %c14_3509 = arith.constant 14 : index
    %c3_3510 = arith.constant 3 : index
    %c14_3511 = arith.constant 14 : index
    %3384 = tensor.empty() : tensor<1x1024x14x14xi8>
    %3385 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3503 : tensor<1x1024x14x14xf32>) outs(%3384 : tensor<1x1024x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3382
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3380
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x1024x14x14xi8>
    %cast_3512 = tensor.cast %3385 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %cast_3513 = tensor.cast %cast_3512 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %3386 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3387 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3388 = torch.aten.item %3386 : !torch.vtensor<[],f32> -> !torch.float
    %3389 = torch_c.to_f64 %3388
    %3390 = torch.aten.item %3387 : !torch.vtensor<[],si8> -> !torch.int
    %3391 = torch_c.to_i64 %3390
    %cast_3514 = tensor.cast %cast_3513 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %c1_3515 = arith.constant 1 : index
    %c1_3516 = arith.constant 1 : index
    %c1024_3517 = arith.constant 1024 : index
    %c2_3518 = arith.constant 2 : index
    %c14_3519 = arith.constant 14 : index
    %c3_3520 = arith.constant 3 : index
    %c14_3521 = arith.constant 14 : index
    %3392 = tensor.empty() : tensor<1x1024x14x14xf32>
    %3393 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3514 : tensor<1x1024x14x14xi8>) outs(%3392 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3390
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3388
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_3522 = tensor.cast %3393 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %int1_3523 = torch.constant.int 1
    %3394 = torch_c.to_i64 %int1_3523
    %c1_3524 = arith.constant 1 : index
    %c1_3525 = arith.constant 1 : index
    %c1024_3526 = arith.constant 1024 : index
    %c2_3527 = arith.constant 2 : index
    %c14_3528 = arith.constant 14 : index
    %c3_3529 = arith.constant 3 : index
    %c14_3530 = arith.constant 14 : index
    %c1_3531 = arith.constant 1 : index
    %c1024_3532 = arith.constant 1024 : index
    %3395 = arith.cmpi eq, %c1024_3526, %c1024_3532 : index
    cf.assert %3395, "mismatched size for broadcast"
    %c2_3533 = arith.constant 2 : index
    %c14_3534 = arith.constant 14 : index
    %3396 = arith.cmpi eq, %c14_3528, %c14_3534 : index
    cf.assert %3396, "mismatched size for broadcast"
    %c3_3535 = arith.constant 3 : index
    %c14_3536 = arith.constant 14 : index
    %3397 = arith.cmpi eq, %c14_3530, %c14_3536 : index
    cf.assert %3397, "mismatched size for broadcast"
    %3398 = tensor.empty() : tensor<1x1024x14x14xf32>
    %3399 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3522, %cast_3216 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%3398 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %3394 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_3537 = tensor.cast %3399 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %c1_3538 = arith.constant 1 : index
    %c1_3539 = arith.constant 1 : index
    %c1024_3540 = arith.constant 1024 : index
    %c2_3541 = arith.constant 2 : index
    %c14_3542 = arith.constant 14 : index
    %c3_3543 = arith.constant 3 : index
    %c14_3544 = arith.constant 14 : index
    %3400 = tensor.empty() : tensor<1x1024x14x14xf32>
    %3401 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3537 : tensor<1x1024x14x14xf32>) outs(%3400 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_3545 = tensor.cast %3401 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %3402 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3403 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3546 = torch.constant.int 12
    %3404 = torch.aten.item %3402 : !torch.vtensor<[],f32> -> !torch.float
    %3405 = torch_c.to_f64 %3404
    %3406 = torch.aten.item %3403 : !torch.vtensor<[],si8> -> !torch.int
    %3407 = torch_c.to_i64 %3406
    %c1_3547 = arith.constant 1 : index
    %c1_3548 = arith.constant 1 : index
    %c1024_3549 = arith.constant 1024 : index
    %c2_3550 = arith.constant 2 : index
    %c14_3551 = arith.constant 14 : index
    %c3_3552 = arith.constant 3 : index
    %c14_3553 = arith.constant 14 : index
    %3408 = tensor.empty() : tensor<1x1024x14x14xi8>
    %3409 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3545 : tensor<1x1024x14x14xf32>) outs(%3408 : tensor<1x1024x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3406
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3404
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x1024x14x14xi8>
    %cast_3554 = tensor.cast %3409 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %cast_3555 = tensor.cast %cast_3554 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %3410 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3411 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3412 = torch.aten.item %3410 : !torch.vtensor<[],f32> -> !torch.float
    %3413 = torch_c.to_f64 %3412
    %3414 = torch.aten.item %3411 : !torch.vtensor<[],si8> -> !torch.int
    %3415 = torch_c.to_i64 %3414
    %cast_3556 = tensor.cast %cast_3555 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %c1_3557 = arith.constant 1 : index
    %c1_3558 = arith.constant 1 : index
    %c1024_3559 = arith.constant 1024 : index
    %c2_3560 = arith.constant 2 : index
    %c14_3561 = arith.constant 14 : index
    %c3_3562 = arith.constant 3 : index
    %c14_3563 = arith.constant 14 : index
    %3416 = tensor.empty() : tensor<1x1024x14x14xf32>
    %3417 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3556 : tensor<1x1024x14x14xi8>) outs(%3416 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3414
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3412
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_3564 = tensor.cast %3417 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %3418 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %3419 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3565 = torch.constant.int 12
    %3420 = torch.aten.item %3418 : !torch.vtensor<[],f32> -> !torch.float
    %3421 = torch_c.to_f64 %3420
    %3422 = torch.aten.item %3419 : !torch.vtensor<[],si8> -> !torch.int
    %3423 = torch_c.to_i64 %3422
    %c1_3566 = arith.constant 1 : index
    %c0_3567 = arith.constant 0 : index
    %c256_3568 = arith.constant 256 : index
    %c1_3569 = arith.constant 1 : index
    %c1024_3570 = arith.constant 1024 : index
    %3424 = tensor.empty() : tensor<256x1024x1x1xi8>
    %3425 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%126 : tensor<256x1024x1x1xf32>) outs(%3424 : tensor<256x1024x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3422
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3420
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x1024x1x1xi8>
    %cast_3571 = tensor.cast %3425 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %cast_3572 = tensor.cast %cast_3571 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %3426 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %3427 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3428 = torch.aten.item %3426 : !torch.vtensor<[],f32> -> !torch.float
    %3429 = torch_c.to_f64 %3428
    %3430 = torch.aten.item %3427 : !torch.vtensor<[],si8> -> !torch.int
    %3431 = torch_c.to_i64 %3430
    %cast_3573 = tensor.cast %cast_3572 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %c1_3574 = arith.constant 1 : index
    %c0_3575 = arith.constant 0 : index
    %c256_3576 = arith.constant 256 : index
    %c1_3577 = arith.constant 1 : index
    %c1024_3578 = arith.constant 1024 : index
    %3432 = tensor.empty() : tensor<256x1024x1x1xf32>
    %3433 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3573 : tensor<256x1024x1x1xi8>) outs(%3432 : tensor<256x1024x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3430
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3428
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x1024x1x1xf32>
    %cast_3579 = tensor.cast %3433 : tensor<256x1024x1x1xf32> to tensor<256x1024x1x1xf32>
    %3434 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3435 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3580 = torch.constant.int 12
    %3436 = torch.aten.item %3434 : !torch.vtensor<[],f32> -> !torch.float
    %3437 = torch_c.to_f64 %3436
    %3438 = torch.aten.item %3435 : !torch.vtensor<[],si8> -> !torch.int
    %3439 = torch_c.to_i64 %3438
    %c1_3581 = arith.constant 1 : index
    %c0_3582 = arith.constant 0 : index
    %c256_3583 = arith.constant 256 : index
    %3440 = tensor.empty() : tensor<256xi8>
    %3441 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%128 : tensor<256xf32>) outs(%3440 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3438
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3436
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_3584 = tensor.cast %3441 : tensor<256xi8> to tensor<256xi8>
    %cast_3585 = tensor.cast %cast_3584 : tensor<256xi8> to tensor<256xi8>
    %3442 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3443 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3444 = torch.aten.item %3442 : !torch.vtensor<[],f32> -> !torch.float
    %3445 = torch_c.to_f64 %3444
    %3446 = torch.aten.item %3443 : !torch.vtensor<[],si8> -> !torch.int
    %3447 = torch_c.to_i64 %3446
    %cast_3586 = tensor.cast %cast_3585 : tensor<256xi8> to tensor<256xi8>
    %c1_3587 = arith.constant 1 : index
    %c0_3588 = arith.constant 0 : index
    %c256_3589 = arith.constant 256 : index
    %3448 = tensor.empty() : tensor<256xf32>
    %3449 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3586 : tensor<256xi8>) outs(%3448 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3446
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3444
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_3590 = tensor.cast %3449 : tensor<256xf32> to tensor<256xf32>
    %int0_3591 = torch.constant.int 0
    %int0_3592 = torch.constant.int 0
    %int1_3593 = torch.constant.int 1
    %int1_3594 = torch.constant.int 1
    %int1_3595 = torch.constant.int 1
    %int1_3596 = torch.constant.int 1
    %int0_3597 = torch.constant.int 0
    %3450 = torch.prim.ListConstruct %int0_3591, %int0_3592 : (!torch.int, !torch.int) -> !torch.list<int>
    %3451 = torch.prim.ListConstruct %int1_3593, %int1_3594 : (!torch.int, !torch.int) -> !torch.list<int>
    %3452 = torch.prim.ListConstruct %int1_3595, %int1_3596 : (!torch.int, !torch.int) -> !torch.list<int>
    %3453 = torch.prim.ListConstruct %int0_3597, %int0_3597 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_3598 = torch.constant.bool false
    %int1_3599 = torch.constant.int 1
    %3454 = torch_c.to_i64 %int1_3599
    %3455 = torch_c.to_i64 %int0_3591
    %3456 = torch_c.to_i64 %int0_3592
    %3457 = torch_c.to_i64 %int0_3597
    %3458 = torch_c.to_i64 %int0_3597
    %c0_3600 = arith.constant 0 : index
    %c1_3601 = arith.constant 1 : index
    %c1_3602 = arith.constant 1 : index
    %c1024_3603 = arith.constant 1024 : index
    %c2_3604 = arith.constant 2 : index
    %c14_3605 = arith.constant 14 : index
    %c3_3606 = arith.constant 3 : index
    %c14_3607 = arith.constant 14 : index
    %c0_3608 = arith.constant 0 : index
    %c256_3609 = arith.constant 256 : index
    %c1_3610 = arith.constant 1 : index
    %c1024_3611 = arith.constant 1024 : index
    %c2_3612 = arith.constant 2 : index
    %c1_3613 = arith.constant 1 : index
    %c3_3614 = arith.constant 3 : index
    %c1_3615 = arith.constant 1 : index
    %3459 = arith.index_cast %3454 : i64 to index
    %c0_3616 = arith.constant 0 : index
    %3460 = arith.remsi %c1024_3603, %3459 : index
    %3461 = arith.cmpi eq, %c0_3616, %3460 : index
    cf.assert %3461, "invalid: groups must divide input channel size evenly."
    %c0_3617 = arith.constant 0 : index
    %3462 = arith.remsi %c256_3609, %3459 : index
    %3463 = arith.cmpi eq, %c0_3617, %3462 : index
    cf.assert %3463, "invalid: groups must divide weight batch size evenly."
    %c1_i64_3618 = arith.constant 1 : i64
    %c1_i64_3619 = arith.constant 1 : i64
    %c1_i64_3620 = arith.constant 1 : i64
    %c1_i64_3621 = arith.constant 1 : i64
    %cst_3622 = arith.constant 0.000000e+00 : f32
    %c0_3623 = arith.constant 0 : index
    %c1_3624 = arith.constant 1 : index
    %c1_3625 = arith.constant 1 : index
    %c1024_3626 = arith.constant 1024 : index
    %c2_3627 = arith.constant 2 : index
    %c14_3628 = arith.constant 14 : index
    %c3_3629 = arith.constant 3 : index
    %c14_3630 = arith.constant 14 : index
    %c0_i64_3631 = arith.constant 0 : i64
    %3464 = arith.index_cast %c0_i64_3631 : i64 to index
    %3465 = arith.index_cast %c0_i64_3631 : i64 to index
    %3466 = arith.index_cast %3455 : i64 to index
    %3467 = arith.index_cast %3456 : i64 to index
    %padded_3632 = tensor.pad %cast_3564 low[%3464, %3465, %3466, %3467] high[%3464, %3465, %3466, %3467] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_3622 : f32
    } : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
    %3468 = arith.index_cast %c1_3613 : index to i64
    %c1_i64_3633 = arith.constant 1 : i64
    %c2_i64_3634 = arith.constant 2 : i64
    %3469 = arith.muli %3455, %c2_i64_3634 : i64
    %3470 = arith.index_cast %c14_3605 : index to i64
    %3471 = arith.addi %3470, %3469 : i64
    %3472 = arith.subi %3468, %c1_i64_3633 : i64
    %3473 = arith.muli %c1_i64_3618, %3472 : i64
    %3474 = arith.subi %3471, %3473 : i64
    %3475 = arith.subi %3474, %c1_i64_3633 : i64
    %3476 = arith.floordivsi %3475, %c1_i64_3620 : i64
    %3477 = arith.addi %3476, %c1_i64_3633 : i64
    %3478 = arith.index_cast %3477 : i64 to index
    %3479 = arith.index_cast %c1_3615 : index to i64
    %c1_i64_3635 = arith.constant 1 : i64
    %c2_i64_3636 = arith.constant 2 : i64
    %3480 = arith.muli %3456, %c2_i64_3636 : i64
    %3481 = arith.index_cast %c14_3607 : index to i64
    %3482 = arith.addi %3481, %3480 : i64
    %3483 = arith.subi %3479, %c1_i64_3635 : i64
    %3484 = arith.muli %c1_i64_3619, %3483 : i64
    %3485 = arith.subi %3482, %3484 : i64
    %3486 = arith.subi %3485, %c1_i64_3635 : i64
    %3487 = arith.floordivsi %3486, %c1_i64_3621 : i64
    %3488 = arith.addi %3487, %c1_i64_3635 : i64
    %3489 = arith.index_cast %3488 : i64 to index
    %3490 = tensor.empty(%3478, %3489) : tensor<1x256x?x?xf32>
    %3491 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3590 : tensor<256xf32>) outs(%3490 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %3492 = arith.floordivsi %c1024_3603, %3459 : index
    %3493 = arith.floordivsi %c256_3609, %3459 : index
    %c0_3637 = arith.constant 0 : index
    %c1_3638 = arith.constant 1 : index
    %3494 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3632, %cast_3579 : tensor<?x?x?x?xf32>, tensor<256x1024x1x1xf32>) outs(%3491 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_3639 = tensor.cast %3494 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
    %c1_3640 = arith.constant 1 : index
    %c1_3641 = arith.constant 1 : index
    %c256_3642 = arith.constant 256 : index
    %c2_3643 = arith.constant 2 : index
    %c14_3644 = arith.constant 14 : index
    %c3_3645 = arith.constant 3 : index
    %c14_3646 = arith.constant 14 : index
    %3495 = tensor.empty() : tensor<1x256x14x14xf32>
    %3496 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3639 : tensor<1x256x14x14xf32>) outs(%3495 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_3647 = tensor.cast %3496 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %3497 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3498 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3648 = torch.constant.int 12
    %3499 = torch.aten.item %3497 : !torch.vtensor<[],f32> -> !torch.float
    %3500 = torch_c.to_f64 %3499
    %3501 = torch.aten.item %3498 : !torch.vtensor<[],si8> -> !torch.int
    %3502 = torch_c.to_i64 %3501
    %c1_3649 = arith.constant 1 : index
    %c1_3650 = arith.constant 1 : index
    %c256_3651 = arith.constant 256 : index
    %c2_3652 = arith.constant 2 : index
    %c14_3653 = arith.constant 14 : index
    %c3_3654 = arith.constant 3 : index
    %c14_3655 = arith.constant 14 : index
    %3503 = tensor.empty() : tensor<1x256x14x14xi8>
    %3504 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3647 : tensor<1x256x14x14xf32>) outs(%3503 : tensor<1x256x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3501
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3499
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x14x14xi8>
    %cast_3656 = tensor.cast %3504 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %cast_3657 = tensor.cast %cast_3656 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %3505 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3506 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3507 = torch.aten.item %3505 : !torch.vtensor<[],f32> -> !torch.float
    %3508 = torch_c.to_f64 %3507
    %3509 = torch.aten.item %3506 : !torch.vtensor<[],si8> -> !torch.int
    %3510 = torch_c.to_i64 %3509
    %cast_3658 = tensor.cast %cast_3657 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %c1_3659 = arith.constant 1 : index
    %c1_3660 = arith.constant 1 : index
    %c256_3661 = arith.constant 256 : index
    %c2_3662 = arith.constant 2 : index
    %c14_3663 = arith.constant 14 : index
    %c3_3664 = arith.constant 3 : index
    %c14_3665 = arith.constant 14 : index
    %3511 = tensor.empty() : tensor<1x256x14x14xf32>
    %3512 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3658 : tensor<1x256x14x14xi8>) outs(%3511 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3509
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3507
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_3666 = tensor.cast %3512 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %3513 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %3514 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3667 = torch.constant.int 12
    %3515 = torch.aten.item %3513 : !torch.vtensor<[],f32> -> !torch.float
    %3516 = torch_c.to_f64 %3515
    %3517 = torch.aten.item %3514 : !torch.vtensor<[],si8> -> !torch.int
    %3518 = torch_c.to_i64 %3517
    %c1_3668 = arith.constant 1 : index
    %c0_3669 = arith.constant 0 : index
    %c256_3670 = arith.constant 256 : index
    %c1_3671 = arith.constant 1 : index
    %c256_3672 = arith.constant 256 : index
    %c2_3673 = arith.constant 2 : index
    %c3_3674 = arith.constant 3 : index
    %c3_3675 = arith.constant 3 : index
    %c3_3676 = arith.constant 3 : index
    %3519 = tensor.empty() : tensor<256x256x3x3xi8>
    %3520 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%130 : tensor<256x256x3x3xf32>) outs(%3519 : tensor<256x256x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3517
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3515
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x256x3x3xi8>
    %cast_3677 = tensor.cast %3520 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %cast_3678 = tensor.cast %cast_3677 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %3521 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %3522 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3523 = torch.aten.item %3521 : !torch.vtensor<[],f32> -> !torch.float
    %3524 = torch_c.to_f64 %3523
    %3525 = torch.aten.item %3522 : !torch.vtensor<[],si8> -> !torch.int
    %3526 = torch_c.to_i64 %3525
    %cast_3679 = tensor.cast %cast_3678 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %c1_3680 = arith.constant 1 : index
    %c0_3681 = arith.constant 0 : index
    %c256_3682 = arith.constant 256 : index
    %c1_3683 = arith.constant 1 : index
    %c256_3684 = arith.constant 256 : index
    %c2_3685 = arith.constant 2 : index
    %c3_3686 = arith.constant 3 : index
    %c3_3687 = arith.constant 3 : index
    %c3_3688 = arith.constant 3 : index
    %3527 = tensor.empty() : tensor<256x256x3x3xf32>
    %3528 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3679 : tensor<256x256x3x3xi8>) outs(%3527 : tensor<256x256x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3525
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3523
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x256x3x3xf32>
    %cast_3689 = tensor.cast %3528 : tensor<256x256x3x3xf32> to tensor<256x256x3x3xf32>
    %3529 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3530 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3690 = torch.constant.int 12
    %3531 = torch.aten.item %3529 : !torch.vtensor<[],f32> -> !torch.float
    %3532 = torch_c.to_f64 %3531
    %3533 = torch.aten.item %3530 : !torch.vtensor<[],si8> -> !torch.int
    %3534 = torch_c.to_i64 %3533
    %c1_3691 = arith.constant 1 : index
    %c0_3692 = arith.constant 0 : index
    %c256_3693 = arith.constant 256 : index
    %3535 = tensor.empty() : tensor<256xi8>
    %3536 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%132 : tensor<256xf32>) outs(%3535 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3533
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3531
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_3694 = tensor.cast %3536 : tensor<256xi8> to tensor<256xi8>
    %cast_3695 = tensor.cast %cast_3694 : tensor<256xi8> to tensor<256xi8>
    %3537 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3538 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3539 = torch.aten.item %3537 : !torch.vtensor<[],f32> -> !torch.float
    %3540 = torch_c.to_f64 %3539
    %3541 = torch.aten.item %3538 : !torch.vtensor<[],si8> -> !torch.int
    %3542 = torch_c.to_i64 %3541
    %cast_3696 = tensor.cast %cast_3695 : tensor<256xi8> to tensor<256xi8>
    %c1_3697 = arith.constant 1 : index
    %c0_3698 = arith.constant 0 : index
    %c256_3699 = arith.constant 256 : index
    %3543 = tensor.empty() : tensor<256xf32>
    %3544 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3696 : tensor<256xi8>) outs(%3543 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3541
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3539
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_3700 = tensor.cast %3544 : tensor<256xf32> to tensor<256xf32>
    %int1_3701 = torch.constant.int 1
    %int1_3702 = torch.constant.int 1
    %int1_3703 = torch.constant.int 1
    %int1_3704 = torch.constant.int 1
    %int1_3705 = torch.constant.int 1
    %int1_3706 = torch.constant.int 1
    %int0_3707 = torch.constant.int 0
    %3545 = torch.prim.ListConstruct %int1_3701, %int1_3702 : (!torch.int, !torch.int) -> !torch.list<int>
    %3546 = torch.prim.ListConstruct %int1_3703, %int1_3704 : (!torch.int, !torch.int) -> !torch.list<int>
    %3547 = torch.prim.ListConstruct %int1_3705, %int1_3706 : (!torch.int, !torch.int) -> !torch.list<int>
    %3548 = torch.prim.ListConstruct %int0_3707, %int0_3707 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_3708 = torch.constant.bool false
    %int1_3709 = torch.constant.int 1
    %3549 = torch_c.to_i64 %int1_3709
    %3550 = torch_c.to_i64 %int1_3701
    %3551 = torch_c.to_i64 %int1_3702
    %3552 = torch_c.to_i64 %int0_3707
    %3553 = torch_c.to_i64 %int0_3707
    %c0_3710 = arith.constant 0 : index
    %c1_3711 = arith.constant 1 : index
    %c1_3712 = arith.constant 1 : index
    %c256_3713 = arith.constant 256 : index
    %c2_3714 = arith.constant 2 : index
    %c14_3715 = arith.constant 14 : index
    %c3_3716 = arith.constant 3 : index
    %c14_3717 = arith.constant 14 : index
    %c0_3718 = arith.constant 0 : index
    %c256_3719 = arith.constant 256 : index
    %c1_3720 = arith.constant 1 : index
    %c256_3721 = arith.constant 256 : index
    %c2_3722 = arith.constant 2 : index
    %c3_3723 = arith.constant 3 : index
    %c3_3724 = arith.constant 3 : index
    %c3_3725 = arith.constant 3 : index
    %3554 = arith.index_cast %3549 : i64 to index
    %c0_3726 = arith.constant 0 : index
    %3555 = arith.remsi %c256_3713, %3554 : index
    %3556 = arith.cmpi eq, %c0_3726, %3555 : index
    cf.assert %3556, "invalid: groups must divide input channel size evenly."
    %c0_3727 = arith.constant 0 : index
    %3557 = arith.remsi %c256_3719, %3554 : index
    %3558 = arith.cmpi eq, %c0_3727, %3557 : index
    cf.assert %3558, "invalid: groups must divide weight batch size evenly."
    %c1_i64_3728 = arith.constant 1 : i64
    %c1_i64_3729 = arith.constant 1 : i64
    %c1_i64_3730 = arith.constant 1 : i64
    %c1_i64_3731 = arith.constant 1 : i64
    %cst_3732 = arith.constant 0.000000e+00 : f32
    %c0_3733 = arith.constant 0 : index
    %c1_3734 = arith.constant 1 : index
    %c1_3735 = arith.constant 1 : index
    %c256_3736 = arith.constant 256 : index
    %c2_3737 = arith.constant 2 : index
    %c14_3738 = arith.constant 14 : index
    %c3_3739 = arith.constant 3 : index
    %c14_3740 = arith.constant 14 : index
    %c0_i64_3741 = arith.constant 0 : i64
    %3559 = arith.index_cast %c0_i64_3741 : i64 to index
    %3560 = arith.index_cast %c0_i64_3741 : i64 to index
    %3561 = arith.index_cast %3550 : i64 to index
    %3562 = arith.index_cast %3551 : i64 to index
    %padded_3742 = tensor.pad %cast_3666 low[%3559, %3560, %3561, %3562] high[%3559, %3560, %3561, %3562] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_3732 : f32
    } : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
    %3563 = arith.index_cast %c3_3723 : index to i64
    %c1_i64_3743 = arith.constant 1 : i64
    %c2_i64_3744 = arith.constant 2 : i64
    %3564 = arith.muli %3550, %c2_i64_3744 : i64
    %3565 = arith.index_cast %c14_3715 : index to i64
    %3566 = arith.addi %3565, %3564 : i64
    %3567 = arith.subi %3563, %c1_i64_3743 : i64
    %3568 = arith.muli %c1_i64_3728, %3567 : i64
    %3569 = arith.subi %3566, %3568 : i64
    %3570 = arith.subi %3569, %c1_i64_3743 : i64
    %3571 = arith.floordivsi %3570, %c1_i64_3730 : i64
    %3572 = arith.addi %3571, %c1_i64_3743 : i64
    %3573 = arith.index_cast %3572 : i64 to index
    %3574 = arith.index_cast %c3_3725 : index to i64
    %c1_i64_3745 = arith.constant 1 : i64
    %c2_i64_3746 = arith.constant 2 : i64
    %3575 = arith.muli %3551, %c2_i64_3746 : i64
    %3576 = arith.index_cast %c14_3717 : index to i64
    %3577 = arith.addi %3576, %3575 : i64
    %3578 = arith.subi %3574, %c1_i64_3745 : i64
    %3579 = arith.muli %c1_i64_3729, %3578 : i64
    %3580 = arith.subi %3577, %3579 : i64
    %3581 = arith.subi %3580, %c1_i64_3745 : i64
    %3582 = arith.floordivsi %3581, %c1_i64_3731 : i64
    %3583 = arith.addi %3582, %c1_i64_3745 : i64
    %3584 = arith.index_cast %3583 : i64 to index
    %3585 = tensor.empty(%3573, %3584) : tensor<1x256x?x?xf32>
    %3586 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3700 : tensor<256xf32>) outs(%3585 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %3587 = arith.floordivsi %c256_3713, %3554 : index
    %3588 = arith.floordivsi %c256_3719, %3554 : index
    %c0_3747 = arith.constant 0 : index
    %c1_3748 = arith.constant 1 : index
    %3589 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3742, %cast_3689 : tensor<?x?x?x?xf32>, tensor<256x256x3x3xf32>) outs(%3586 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_3749 = tensor.cast %3589 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
    %c1_3750 = arith.constant 1 : index
    %c1_3751 = arith.constant 1 : index
    %c256_3752 = arith.constant 256 : index
    %c2_3753 = arith.constant 2 : index
    %c14_3754 = arith.constant 14 : index
    %c3_3755 = arith.constant 3 : index
    %c14_3756 = arith.constant 14 : index
    %3590 = tensor.empty() : tensor<1x256x14x14xf32>
    %3591 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3749 : tensor<1x256x14x14xf32>) outs(%3590 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_3757 = tensor.cast %3591 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %3592 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3593 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3758 = torch.constant.int 12
    %3594 = torch.aten.item %3592 : !torch.vtensor<[],f32> -> !torch.float
    %3595 = torch_c.to_f64 %3594
    %3596 = torch.aten.item %3593 : !torch.vtensor<[],si8> -> !torch.int
    %3597 = torch_c.to_i64 %3596
    %c1_3759 = arith.constant 1 : index
    %c1_3760 = arith.constant 1 : index
    %c256_3761 = arith.constant 256 : index
    %c2_3762 = arith.constant 2 : index
    %c14_3763 = arith.constant 14 : index
    %c3_3764 = arith.constant 3 : index
    %c14_3765 = arith.constant 14 : index
    %3598 = tensor.empty() : tensor<1x256x14x14xi8>
    %3599 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3757 : tensor<1x256x14x14xf32>) outs(%3598 : tensor<1x256x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3596
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3594
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x14x14xi8>
    %cast_3766 = tensor.cast %3599 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %cast_3767 = tensor.cast %cast_3766 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %3600 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3601 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3602 = torch.aten.item %3600 : !torch.vtensor<[],f32> -> !torch.float
    %3603 = torch_c.to_f64 %3602
    %3604 = torch.aten.item %3601 : !torch.vtensor<[],si8> -> !torch.int
    %3605 = torch_c.to_i64 %3604
    %cast_3768 = tensor.cast %cast_3767 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %c1_3769 = arith.constant 1 : index
    %c1_3770 = arith.constant 1 : index
    %c256_3771 = arith.constant 256 : index
    %c2_3772 = arith.constant 2 : index
    %c14_3773 = arith.constant 14 : index
    %c3_3774 = arith.constant 3 : index
    %c14_3775 = arith.constant 14 : index
    %3606 = tensor.empty() : tensor<1x256x14x14xf32>
    %3607 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3768 : tensor<1x256x14x14xi8>) outs(%3606 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3604
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3602
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_3776 = tensor.cast %3607 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %3608 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3609 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3777 = torch.constant.int 12
    %3610 = torch.aten.item %3608 : !torch.vtensor<[],f32> -> !torch.float
    %3611 = torch_c.to_f64 %3610
    %3612 = torch.aten.item %3609 : !torch.vtensor<[],si8> -> !torch.int
    %3613 = torch_c.to_i64 %3612
    %c1_3778 = arith.constant 1 : index
    %c0_3779 = arith.constant 0 : index
    %c1024_3780 = arith.constant 1024 : index
    %c1_3781 = arith.constant 1 : index
    %c256_3782 = arith.constant 256 : index
    %3614 = tensor.empty() : tensor<1024x256x1x1xi8>
    %3615 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%134 : tensor<1024x256x1x1xf32>) outs(%3614 : tensor<1024x256x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3612
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3610
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024x256x1x1xi8>
    %cast_3783 = tensor.cast %3615 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %cast_3784 = tensor.cast %cast_3783 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %3616 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3617 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3618 = torch.aten.item %3616 : !torch.vtensor<[],f32> -> !torch.float
    %3619 = torch_c.to_f64 %3618
    %3620 = torch.aten.item %3617 : !torch.vtensor<[],si8> -> !torch.int
    %3621 = torch_c.to_i64 %3620
    %cast_3785 = tensor.cast %cast_3784 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %c1_3786 = arith.constant 1 : index
    %c0_3787 = arith.constant 0 : index
    %c1024_3788 = arith.constant 1024 : index
    %c1_3789 = arith.constant 1 : index
    %c256_3790 = arith.constant 256 : index
    %3622 = tensor.empty() : tensor<1024x256x1x1xf32>
    %3623 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3785 : tensor<1024x256x1x1xi8>) outs(%3622 : tensor<1024x256x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3620
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3618
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024x256x1x1xf32>
    %cast_3791 = tensor.cast %3623 : tensor<1024x256x1x1xf32> to tensor<1024x256x1x1xf32>
    %3624 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3625 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3792 = torch.constant.int 12
    %3626 = torch.aten.item %3624 : !torch.vtensor<[],f32> -> !torch.float
    %3627 = torch_c.to_f64 %3626
    %3628 = torch.aten.item %3625 : !torch.vtensor<[],si8> -> !torch.int
    %3629 = torch_c.to_i64 %3628
    %c1_3793 = arith.constant 1 : index
    %c0_3794 = arith.constant 0 : index
    %c1024_3795 = arith.constant 1024 : index
    %3630 = tensor.empty() : tensor<1024xi8>
    %3631 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%136 : tensor<1024xf32>) outs(%3630 : tensor<1024xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3628
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3626
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024xi8>
    %cast_3796 = tensor.cast %3631 : tensor<1024xi8> to tensor<1024xi8>
    %cast_3797 = tensor.cast %cast_3796 : tensor<1024xi8> to tensor<1024xi8>
    %3632 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3633 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3634 = torch.aten.item %3632 : !torch.vtensor<[],f32> -> !torch.float
    %3635 = torch_c.to_f64 %3634
    %3636 = torch.aten.item %3633 : !torch.vtensor<[],si8> -> !torch.int
    %3637 = torch_c.to_i64 %3636
    %cast_3798 = tensor.cast %cast_3797 : tensor<1024xi8> to tensor<1024xi8>
    %c1_3799 = arith.constant 1 : index
    %c0_3800 = arith.constant 0 : index
    %c1024_3801 = arith.constant 1024 : index
    %3638 = tensor.empty() : tensor<1024xf32>
    %3639 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3798 : tensor<1024xi8>) outs(%3638 : tensor<1024xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3636
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3634
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024xf32>
    %cast_3802 = tensor.cast %3639 : tensor<1024xf32> to tensor<1024xf32>
    %int0_3803 = torch.constant.int 0
    %int0_3804 = torch.constant.int 0
    %int1_3805 = torch.constant.int 1
    %int1_3806 = torch.constant.int 1
    %int1_3807 = torch.constant.int 1
    %int1_3808 = torch.constant.int 1
    %int0_3809 = torch.constant.int 0
    %3640 = torch.prim.ListConstruct %int0_3803, %int0_3804 : (!torch.int, !torch.int) -> !torch.list<int>
    %3641 = torch.prim.ListConstruct %int1_3805, %int1_3806 : (!torch.int, !torch.int) -> !torch.list<int>
    %3642 = torch.prim.ListConstruct %int1_3807, %int1_3808 : (!torch.int, !torch.int) -> !torch.list<int>
    %3643 = torch.prim.ListConstruct %int0_3809, %int0_3809 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_3810 = torch.constant.bool false
    %int1_3811 = torch.constant.int 1
    %3644 = torch_c.to_i64 %int1_3811
    %3645 = torch_c.to_i64 %int0_3803
    %3646 = torch_c.to_i64 %int0_3804
    %3647 = torch_c.to_i64 %int0_3809
    %3648 = torch_c.to_i64 %int0_3809
    %c0_3812 = arith.constant 0 : index
    %c1_3813 = arith.constant 1 : index
    %c1_3814 = arith.constant 1 : index
    %c256_3815 = arith.constant 256 : index
    %c2_3816 = arith.constant 2 : index
    %c14_3817 = arith.constant 14 : index
    %c3_3818 = arith.constant 3 : index
    %c14_3819 = arith.constant 14 : index
    %c0_3820 = arith.constant 0 : index
    %c1024_3821 = arith.constant 1024 : index
    %c1_3822 = arith.constant 1 : index
    %c256_3823 = arith.constant 256 : index
    %c2_3824 = arith.constant 2 : index
    %c1_3825 = arith.constant 1 : index
    %c3_3826 = arith.constant 3 : index
    %c1_3827 = arith.constant 1 : index
    %3649 = arith.index_cast %3644 : i64 to index
    %c0_3828 = arith.constant 0 : index
    %3650 = arith.remsi %c256_3815, %3649 : index
    %3651 = arith.cmpi eq, %c0_3828, %3650 : index
    cf.assert %3651, "invalid: groups must divide input channel size evenly."
    %c0_3829 = arith.constant 0 : index
    %3652 = arith.remsi %c1024_3821, %3649 : index
    %3653 = arith.cmpi eq, %c0_3829, %3652 : index
    cf.assert %3653, "invalid: groups must divide weight batch size evenly."
    %c1_i64_3830 = arith.constant 1 : i64
    %c1_i64_3831 = arith.constant 1 : i64
    %c1_i64_3832 = arith.constant 1 : i64
    %c1_i64_3833 = arith.constant 1 : i64
    %cst_3834 = arith.constant 0.000000e+00 : f32
    %c0_3835 = arith.constant 0 : index
    %c1_3836 = arith.constant 1 : index
    %c1_3837 = arith.constant 1 : index
    %c256_3838 = arith.constant 256 : index
    %c2_3839 = arith.constant 2 : index
    %c14_3840 = arith.constant 14 : index
    %c3_3841 = arith.constant 3 : index
    %c14_3842 = arith.constant 14 : index
    %c0_i64_3843 = arith.constant 0 : i64
    %3654 = arith.index_cast %c0_i64_3843 : i64 to index
    %3655 = arith.index_cast %c0_i64_3843 : i64 to index
    %3656 = arith.index_cast %3645 : i64 to index
    %3657 = arith.index_cast %3646 : i64 to index
    %padded_3844 = tensor.pad %cast_3776 low[%3654, %3655, %3656, %3657] high[%3654, %3655, %3656, %3657] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_3834 : f32
    } : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
    %3658 = arith.index_cast %c1_3825 : index to i64
    %c1_i64_3845 = arith.constant 1 : i64
    %c2_i64_3846 = arith.constant 2 : i64
    %3659 = arith.muli %3645, %c2_i64_3846 : i64
    %3660 = arith.index_cast %c14_3817 : index to i64
    %3661 = arith.addi %3660, %3659 : i64
    %3662 = arith.subi %3658, %c1_i64_3845 : i64
    %3663 = arith.muli %c1_i64_3830, %3662 : i64
    %3664 = arith.subi %3661, %3663 : i64
    %3665 = arith.subi %3664, %c1_i64_3845 : i64
    %3666 = arith.floordivsi %3665, %c1_i64_3832 : i64
    %3667 = arith.addi %3666, %c1_i64_3845 : i64
    %3668 = arith.index_cast %3667 : i64 to index
    %3669 = arith.index_cast %c1_3827 : index to i64
    %c1_i64_3847 = arith.constant 1 : i64
    %c2_i64_3848 = arith.constant 2 : i64
    %3670 = arith.muli %3646, %c2_i64_3848 : i64
    %3671 = arith.index_cast %c14_3819 : index to i64
    %3672 = arith.addi %3671, %3670 : i64
    %3673 = arith.subi %3669, %c1_i64_3847 : i64
    %3674 = arith.muli %c1_i64_3831, %3673 : i64
    %3675 = arith.subi %3672, %3674 : i64
    %3676 = arith.subi %3675, %c1_i64_3847 : i64
    %3677 = arith.floordivsi %3676, %c1_i64_3833 : i64
    %3678 = arith.addi %3677, %c1_i64_3847 : i64
    %3679 = arith.index_cast %3678 : i64 to index
    %3680 = tensor.empty(%3668, %3679) : tensor<1x1024x?x?xf32>
    %3681 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3802 : tensor<1024xf32>) outs(%3680 : tensor<1x1024x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x1024x?x?xf32>
    %3682 = arith.floordivsi %c256_3815, %3649 : index
    %3683 = arith.floordivsi %c1024_3821, %3649 : index
    %c0_3849 = arith.constant 0 : index
    %c1_3850 = arith.constant 1 : index
    %3684 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3844, %cast_3791 : tensor<?x?x?x?xf32>, tensor<1024x256x1x1xf32>) outs(%3681 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
    %cast_3851 = tensor.cast %3684 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
    %3685 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3686 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3852 = torch.constant.int 12
    %3687 = torch.aten.item %3685 : !torch.vtensor<[],f32> -> !torch.float
    %3688 = torch_c.to_f64 %3687
    %3689 = torch.aten.item %3686 : !torch.vtensor<[],si8> -> !torch.int
    %3690 = torch_c.to_i64 %3689
    %c1_3853 = arith.constant 1 : index
    %c1_3854 = arith.constant 1 : index
    %c1024_3855 = arith.constant 1024 : index
    %c2_3856 = arith.constant 2 : index
    %c14_3857 = arith.constant 14 : index
    %c3_3858 = arith.constant 3 : index
    %c14_3859 = arith.constant 14 : index
    %3691 = tensor.empty() : tensor<1x1024x14x14xi8>
    %3692 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3851 : tensor<1x1024x14x14xf32>) outs(%3691 : tensor<1x1024x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3689
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3687
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x1024x14x14xi8>
    %cast_3860 = tensor.cast %3692 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %cast_3861 = tensor.cast %cast_3860 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %3693 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3694 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3695 = torch.aten.item %3693 : !torch.vtensor<[],f32> -> !torch.float
    %3696 = torch_c.to_f64 %3695
    %3697 = torch.aten.item %3694 : !torch.vtensor<[],si8> -> !torch.int
    %3698 = torch_c.to_i64 %3697
    %cast_3862 = tensor.cast %cast_3861 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %c1_3863 = arith.constant 1 : index
    %c1_3864 = arith.constant 1 : index
    %c1024_3865 = arith.constant 1024 : index
    %c2_3866 = arith.constant 2 : index
    %c14_3867 = arith.constant 14 : index
    %c3_3868 = arith.constant 3 : index
    %c14_3869 = arith.constant 14 : index
    %3699 = tensor.empty() : tensor<1x1024x14x14xf32>
    %3700 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3862 : tensor<1x1024x14x14xi8>) outs(%3699 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3697
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3695
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_3870 = tensor.cast %3700 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %int1_3871 = torch.constant.int 1
    %3701 = torch_c.to_i64 %int1_3871
    %c1_3872 = arith.constant 1 : index
    %c1_3873 = arith.constant 1 : index
    %c1024_3874 = arith.constant 1024 : index
    %c2_3875 = arith.constant 2 : index
    %c14_3876 = arith.constant 14 : index
    %c3_3877 = arith.constant 3 : index
    %c14_3878 = arith.constant 14 : index
    %c1_3879 = arith.constant 1 : index
    %c1024_3880 = arith.constant 1024 : index
    %3702 = arith.cmpi eq, %c1024_3874, %c1024_3880 : index
    cf.assert %3702, "mismatched size for broadcast"
    %c2_3881 = arith.constant 2 : index
    %c14_3882 = arith.constant 14 : index
    %3703 = arith.cmpi eq, %c14_3876, %c14_3882 : index
    cf.assert %3703, "mismatched size for broadcast"
    %c3_3883 = arith.constant 3 : index
    %c14_3884 = arith.constant 14 : index
    %3704 = arith.cmpi eq, %c14_3878, %c14_3884 : index
    cf.assert %3704, "mismatched size for broadcast"
    %3705 = tensor.empty() : tensor<1x1024x14x14xf32>
    %3706 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3870, %cast_3564 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%3705 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %3701 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_3885 = tensor.cast %3706 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %c1_3886 = arith.constant 1 : index
    %c1_3887 = arith.constant 1 : index
    %c1024_3888 = arith.constant 1024 : index
    %c2_3889 = arith.constant 2 : index
    %c14_3890 = arith.constant 14 : index
    %c3_3891 = arith.constant 3 : index
    %c14_3892 = arith.constant 14 : index
    %3707 = tensor.empty() : tensor<1x1024x14x14xf32>
    %3708 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3885 : tensor<1x1024x14x14xf32>) outs(%3707 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_3893 = tensor.cast %3708 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %3709 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3710 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3894 = torch.constant.int 12
    %3711 = torch.aten.item %3709 : !torch.vtensor<[],f32> -> !torch.float
    %3712 = torch_c.to_f64 %3711
    %3713 = torch.aten.item %3710 : !torch.vtensor<[],si8> -> !torch.int
    %3714 = torch_c.to_i64 %3713
    %c1_3895 = arith.constant 1 : index
    %c1_3896 = arith.constant 1 : index
    %c1024_3897 = arith.constant 1024 : index
    %c2_3898 = arith.constant 2 : index
    %c14_3899 = arith.constant 14 : index
    %c3_3900 = arith.constant 3 : index
    %c14_3901 = arith.constant 14 : index
    %3715 = tensor.empty() : tensor<1x1024x14x14xi8>
    %3716 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3893 : tensor<1x1024x14x14xf32>) outs(%3715 : tensor<1x1024x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3713
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3711
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x1024x14x14xi8>
    %cast_3902 = tensor.cast %3716 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %cast_3903 = tensor.cast %cast_3902 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %3717 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3718 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3719 = torch.aten.item %3717 : !torch.vtensor<[],f32> -> !torch.float
    %3720 = torch_c.to_f64 %3719
    %3721 = torch.aten.item %3718 : !torch.vtensor<[],si8> -> !torch.int
    %3722 = torch_c.to_i64 %3721
    %cast_3904 = tensor.cast %cast_3903 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %c1_3905 = arith.constant 1 : index
    %c1_3906 = arith.constant 1 : index
    %c1024_3907 = arith.constant 1024 : index
    %c2_3908 = arith.constant 2 : index
    %c14_3909 = arith.constant 14 : index
    %c3_3910 = arith.constant 3 : index
    %c14_3911 = arith.constant 14 : index
    %3723 = tensor.empty() : tensor<1x1024x14x14xf32>
    %3724 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3904 : tensor<1x1024x14x14xi8>) outs(%3723 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3721
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3719
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_3912 = tensor.cast %3724 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %3725 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %3726 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3913 = torch.constant.int 12
    %3727 = torch.aten.item %3725 : !torch.vtensor<[],f32> -> !torch.float
    %3728 = torch_c.to_f64 %3727
    %3729 = torch.aten.item %3726 : !torch.vtensor<[],si8> -> !torch.int
    %3730 = torch_c.to_i64 %3729
    %c1_3914 = arith.constant 1 : index
    %c0_3915 = arith.constant 0 : index
    %c256_3916 = arith.constant 256 : index
    %c1_3917 = arith.constant 1 : index
    %c1024_3918 = arith.constant 1024 : index
    %3731 = tensor.empty() : tensor<256x1024x1x1xi8>
    %3732 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%138 : tensor<256x1024x1x1xf32>) outs(%3731 : tensor<256x1024x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3729
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3727
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x1024x1x1xi8>
    %cast_3919 = tensor.cast %3732 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %cast_3920 = tensor.cast %cast_3919 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %3733 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %3734 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3735 = torch.aten.item %3733 : !torch.vtensor<[],f32> -> !torch.float
    %3736 = torch_c.to_f64 %3735
    %3737 = torch.aten.item %3734 : !torch.vtensor<[],si8> -> !torch.int
    %3738 = torch_c.to_i64 %3737
    %cast_3921 = tensor.cast %cast_3920 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %c1_3922 = arith.constant 1 : index
    %c0_3923 = arith.constant 0 : index
    %c256_3924 = arith.constant 256 : index
    %c1_3925 = arith.constant 1 : index
    %c1024_3926 = arith.constant 1024 : index
    %3739 = tensor.empty() : tensor<256x1024x1x1xf32>
    %3740 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3921 : tensor<256x1024x1x1xi8>) outs(%3739 : tensor<256x1024x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3737
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3735
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x1024x1x1xf32>
    %cast_3927 = tensor.cast %3740 : tensor<256x1024x1x1xf32> to tensor<256x1024x1x1xf32>
    %3741 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3742 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3928 = torch.constant.int 12
    %3743 = torch.aten.item %3741 : !torch.vtensor<[],f32> -> !torch.float
    %3744 = torch_c.to_f64 %3743
    %3745 = torch.aten.item %3742 : !torch.vtensor<[],si8> -> !torch.int
    %3746 = torch_c.to_i64 %3745
    %c1_3929 = arith.constant 1 : index
    %c0_3930 = arith.constant 0 : index
    %c256_3931 = arith.constant 256 : index
    %3747 = tensor.empty() : tensor<256xi8>
    %3748 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%140 : tensor<256xf32>) outs(%3747 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3745
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3743
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_3932 = tensor.cast %3748 : tensor<256xi8> to tensor<256xi8>
    %cast_3933 = tensor.cast %cast_3932 : tensor<256xi8> to tensor<256xi8>
    %3749 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3750 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3751 = torch.aten.item %3749 : !torch.vtensor<[],f32> -> !torch.float
    %3752 = torch_c.to_f64 %3751
    %3753 = torch.aten.item %3750 : !torch.vtensor<[],si8> -> !torch.int
    %3754 = torch_c.to_i64 %3753
    %cast_3934 = tensor.cast %cast_3933 : tensor<256xi8> to tensor<256xi8>
    %c1_3935 = arith.constant 1 : index
    %c0_3936 = arith.constant 0 : index
    %c256_3937 = arith.constant 256 : index
    %3755 = tensor.empty() : tensor<256xf32>
    %3756 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_3934 : tensor<256xi8>) outs(%3755 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3753
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3751
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_3938 = tensor.cast %3756 : tensor<256xf32> to tensor<256xf32>
    %int0_3939 = torch.constant.int 0
    %int0_3940 = torch.constant.int 0
    %int1_3941 = torch.constant.int 1
    %int1_3942 = torch.constant.int 1
    %int1_3943 = torch.constant.int 1
    %int1_3944 = torch.constant.int 1
    %int0_3945 = torch.constant.int 0
    %3757 = torch.prim.ListConstruct %int0_3939, %int0_3940 : (!torch.int, !torch.int) -> !torch.list<int>
    %3758 = torch.prim.ListConstruct %int1_3941, %int1_3942 : (!torch.int, !torch.int) -> !torch.list<int>
    %3759 = torch.prim.ListConstruct %int1_3943, %int1_3944 : (!torch.int, !torch.int) -> !torch.list<int>
    %3760 = torch.prim.ListConstruct %int0_3945, %int0_3945 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_3946 = torch.constant.bool false
    %int1_3947 = torch.constant.int 1
    %3761 = torch_c.to_i64 %int1_3947
    %3762 = torch_c.to_i64 %int0_3939
    %3763 = torch_c.to_i64 %int0_3940
    %3764 = torch_c.to_i64 %int0_3945
    %3765 = torch_c.to_i64 %int0_3945
    %c0_3948 = arith.constant 0 : index
    %c1_3949 = arith.constant 1 : index
    %c1_3950 = arith.constant 1 : index
    %c1024_3951 = arith.constant 1024 : index
    %c2_3952 = arith.constant 2 : index
    %c14_3953 = arith.constant 14 : index
    %c3_3954 = arith.constant 3 : index
    %c14_3955 = arith.constant 14 : index
    %c0_3956 = arith.constant 0 : index
    %c256_3957 = arith.constant 256 : index
    %c1_3958 = arith.constant 1 : index
    %c1024_3959 = arith.constant 1024 : index
    %c2_3960 = arith.constant 2 : index
    %c1_3961 = arith.constant 1 : index
    %c3_3962 = arith.constant 3 : index
    %c1_3963 = arith.constant 1 : index
    %3766 = arith.index_cast %3761 : i64 to index
    %c0_3964 = arith.constant 0 : index
    %3767 = arith.remsi %c1024_3951, %3766 : index
    %3768 = arith.cmpi eq, %c0_3964, %3767 : index
    cf.assert %3768, "invalid: groups must divide input channel size evenly."
    %c0_3965 = arith.constant 0 : index
    %3769 = arith.remsi %c256_3957, %3766 : index
    %3770 = arith.cmpi eq, %c0_3965, %3769 : index
    cf.assert %3770, "invalid: groups must divide weight batch size evenly."
    %c1_i64_3966 = arith.constant 1 : i64
    %c1_i64_3967 = arith.constant 1 : i64
    %c1_i64_3968 = arith.constant 1 : i64
    %c1_i64_3969 = arith.constant 1 : i64
    %cst_3970 = arith.constant 0.000000e+00 : f32
    %c0_3971 = arith.constant 0 : index
    %c1_3972 = arith.constant 1 : index
    %c1_3973 = arith.constant 1 : index
    %c1024_3974 = arith.constant 1024 : index
    %c2_3975 = arith.constant 2 : index
    %c14_3976 = arith.constant 14 : index
    %c3_3977 = arith.constant 3 : index
    %c14_3978 = arith.constant 14 : index
    %c0_i64_3979 = arith.constant 0 : i64
    %3771 = arith.index_cast %c0_i64_3979 : i64 to index
    %3772 = arith.index_cast %c0_i64_3979 : i64 to index
    %3773 = arith.index_cast %3762 : i64 to index
    %3774 = arith.index_cast %3763 : i64 to index
    %padded_3980 = tensor.pad %cast_3912 low[%3771, %3772, %3773, %3774] high[%3771, %3772, %3773, %3774] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_3970 : f32
    } : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
    %3775 = arith.index_cast %c1_3961 : index to i64
    %c1_i64_3981 = arith.constant 1 : i64
    %c2_i64_3982 = arith.constant 2 : i64
    %3776 = arith.muli %3762, %c2_i64_3982 : i64
    %3777 = arith.index_cast %c14_3953 : index to i64
    %3778 = arith.addi %3777, %3776 : i64
    %3779 = arith.subi %3775, %c1_i64_3981 : i64
    %3780 = arith.muli %c1_i64_3966, %3779 : i64
    %3781 = arith.subi %3778, %3780 : i64
    %3782 = arith.subi %3781, %c1_i64_3981 : i64
    %3783 = arith.floordivsi %3782, %c1_i64_3968 : i64
    %3784 = arith.addi %3783, %c1_i64_3981 : i64
    %3785 = arith.index_cast %3784 : i64 to index
    %3786 = arith.index_cast %c1_3963 : index to i64
    %c1_i64_3983 = arith.constant 1 : i64
    %c2_i64_3984 = arith.constant 2 : i64
    %3787 = arith.muli %3763, %c2_i64_3984 : i64
    %3788 = arith.index_cast %c14_3955 : index to i64
    %3789 = arith.addi %3788, %3787 : i64
    %3790 = arith.subi %3786, %c1_i64_3983 : i64
    %3791 = arith.muli %c1_i64_3967, %3790 : i64
    %3792 = arith.subi %3789, %3791 : i64
    %3793 = arith.subi %3792, %c1_i64_3983 : i64
    %3794 = arith.floordivsi %3793, %c1_i64_3969 : i64
    %3795 = arith.addi %3794, %c1_i64_3983 : i64
    %3796 = arith.index_cast %3795 : i64 to index
    %3797 = tensor.empty(%3785, %3796) : tensor<1x256x?x?xf32>
    %3798 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3938 : tensor<256xf32>) outs(%3797 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %3799 = arith.floordivsi %c1024_3951, %3766 : index
    %3800 = arith.floordivsi %c256_3957, %3766 : index
    %c0_3985 = arith.constant 0 : index
    %c1_3986 = arith.constant 1 : index
    %3801 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_3980, %cast_3927 : tensor<?x?x?x?xf32>, tensor<256x1024x1x1xf32>) outs(%3798 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_3987 = tensor.cast %3801 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
    %c1_3988 = arith.constant 1 : index
    %c1_3989 = arith.constant 1 : index
    %c256_3990 = arith.constant 256 : index
    %c2_3991 = arith.constant 2 : index
    %c14_3992 = arith.constant 14 : index
    %c3_3993 = arith.constant 3 : index
    %c14_3994 = arith.constant 14 : index
    %3802 = tensor.empty() : tensor<1x256x14x14xf32>
    %3803 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3987 : tensor<1x256x14x14xf32>) outs(%3802 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_3995 = tensor.cast %3803 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %3804 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3805 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_3996 = torch.constant.int 12
    %3806 = torch.aten.item %3804 : !torch.vtensor<[],f32> -> !torch.float
    %3807 = torch_c.to_f64 %3806
    %3808 = torch.aten.item %3805 : !torch.vtensor<[],si8> -> !torch.int
    %3809 = torch_c.to_i64 %3808
    %c1_3997 = arith.constant 1 : index
    %c1_3998 = arith.constant 1 : index
    %c256_3999 = arith.constant 256 : index
    %c2_4000 = arith.constant 2 : index
    %c14_4001 = arith.constant 14 : index
    %c3_4002 = arith.constant 3 : index
    %c14_4003 = arith.constant 14 : index
    %3810 = tensor.empty() : tensor<1x256x14x14xi8>
    %3811 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_3995 : tensor<1x256x14x14xf32>) outs(%3810 : tensor<1x256x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3808
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3806
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x14x14xi8>
    %cast_4004 = tensor.cast %3811 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %cast_4005 = tensor.cast %cast_4004 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %3812 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3813 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3814 = torch.aten.item %3812 : !torch.vtensor<[],f32> -> !torch.float
    %3815 = torch_c.to_f64 %3814
    %3816 = torch.aten.item %3813 : !torch.vtensor<[],si8> -> !torch.int
    %3817 = torch_c.to_i64 %3816
    %cast_4006 = tensor.cast %cast_4005 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %c1_4007 = arith.constant 1 : index
    %c1_4008 = arith.constant 1 : index
    %c256_4009 = arith.constant 256 : index
    %c2_4010 = arith.constant 2 : index
    %c14_4011 = arith.constant 14 : index
    %c3_4012 = arith.constant 3 : index
    %c14_4013 = arith.constant 14 : index
    %3818 = tensor.empty() : tensor<1x256x14x14xf32>
    %3819 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4006 : tensor<1x256x14x14xi8>) outs(%3818 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3816
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3814
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_4014 = tensor.cast %3819 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %3820 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3821 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4015 = torch.constant.int 12
    %3822 = torch.aten.item %3820 : !torch.vtensor<[],f32> -> !torch.float
    %3823 = torch_c.to_f64 %3822
    %3824 = torch.aten.item %3821 : !torch.vtensor<[],si8> -> !torch.int
    %3825 = torch_c.to_i64 %3824
    %c1_4016 = arith.constant 1 : index
    %c0_4017 = arith.constant 0 : index
    %c256_4018 = arith.constant 256 : index
    %c1_4019 = arith.constant 1 : index
    %c256_4020 = arith.constant 256 : index
    %c2_4021 = arith.constant 2 : index
    %c3_4022 = arith.constant 3 : index
    %c3_4023 = arith.constant 3 : index
    %c3_4024 = arith.constant 3 : index
    %3826 = tensor.empty() : tensor<256x256x3x3xi8>
    %3827 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%142 : tensor<256x256x3x3xf32>) outs(%3826 : tensor<256x256x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3824
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3822
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x256x3x3xi8>
    %cast_4025 = tensor.cast %3827 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %cast_4026 = tensor.cast %cast_4025 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %3828 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3829 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3830 = torch.aten.item %3828 : !torch.vtensor<[],f32> -> !torch.float
    %3831 = torch_c.to_f64 %3830
    %3832 = torch.aten.item %3829 : !torch.vtensor<[],si8> -> !torch.int
    %3833 = torch_c.to_i64 %3832
    %cast_4027 = tensor.cast %cast_4026 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %c1_4028 = arith.constant 1 : index
    %c0_4029 = arith.constant 0 : index
    %c256_4030 = arith.constant 256 : index
    %c1_4031 = arith.constant 1 : index
    %c256_4032 = arith.constant 256 : index
    %c2_4033 = arith.constant 2 : index
    %c3_4034 = arith.constant 3 : index
    %c3_4035 = arith.constant 3 : index
    %c3_4036 = arith.constant 3 : index
    %3834 = tensor.empty() : tensor<256x256x3x3xf32>
    %3835 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4027 : tensor<256x256x3x3xi8>) outs(%3834 : tensor<256x256x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3832
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3830
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x256x3x3xf32>
    %cast_4037 = tensor.cast %3835 : tensor<256x256x3x3xf32> to tensor<256x256x3x3xf32>
    %3836 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3837 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4038 = torch.constant.int 12
    %3838 = torch.aten.item %3836 : !torch.vtensor<[],f32> -> !torch.float
    %3839 = torch_c.to_f64 %3838
    %3840 = torch.aten.item %3837 : !torch.vtensor<[],si8> -> !torch.int
    %3841 = torch_c.to_i64 %3840
    %c1_4039 = arith.constant 1 : index
    %c0_4040 = arith.constant 0 : index
    %c256_4041 = arith.constant 256 : index
    %3842 = tensor.empty() : tensor<256xi8>
    %3843 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%144 : tensor<256xf32>) outs(%3842 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3840
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3838
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_4042 = tensor.cast %3843 : tensor<256xi8> to tensor<256xi8>
    %cast_4043 = tensor.cast %cast_4042 : tensor<256xi8> to tensor<256xi8>
    %3844 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3845 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3846 = torch.aten.item %3844 : !torch.vtensor<[],f32> -> !torch.float
    %3847 = torch_c.to_f64 %3846
    %3848 = torch.aten.item %3845 : !torch.vtensor<[],si8> -> !torch.int
    %3849 = torch_c.to_i64 %3848
    %cast_4044 = tensor.cast %cast_4043 : tensor<256xi8> to tensor<256xi8>
    %c1_4045 = arith.constant 1 : index
    %c0_4046 = arith.constant 0 : index
    %c256_4047 = arith.constant 256 : index
    %3850 = tensor.empty() : tensor<256xf32>
    %3851 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4044 : tensor<256xi8>) outs(%3850 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3848
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3846
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_4048 = tensor.cast %3851 : tensor<256xf32> to tensor<256xf32>
    %int1_4049 = torch.constant.int 1
    %int1_4050 = torch.constant.int 1
    %int1_4051 = torch.constant.int 1
    %int1_4052 = torch.constant.int 1
    %int1_4053 = torch.constant.int 1
    %int1_4054 = torch.constant.int 1
    %int0_4055 = torch.constant.int 0
    %3852 = torch.prim.ListConstruct %int1_4049, %int1_4050 : (!torch.int, !torch.int) -> !torch.list<int>
    %3853 = torch.prim.ListConstruct %int1_4051, %int1_4052 : (!torch.int, !torch.int) -> !torch.list<int>
    %3854 = torch.prim.ListConstruct %int1_4053, %int1_4054 : (!torch.int, !torch.int) -> !torch.list<int>
    %3855 = torch.prim.ListConstruct %int0_4055, %int0_4055 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_4056 = torch.constant.bool false
    %int1_4057 = torch.constant.int 1
    %3856 = torch_c.to_i64 %int1_4057
    %3857 = torch_c.to_i64 %int1_4049
    %3858 = torch_c.to_i64 %int1_4050
    %3859 = torch_c.to_i64 %int0_4055
    %3860 = torch_c.to_i64 %int0_4055
    %c0_4058 = arith.constant 0 : index
    %c1_4059 = arith.constant 1 : index
    %c1_4060 = arith.constant 1 : index
    %c256_4061 = arith.constant 256 : index
    %c2_4062 = arith.constant 2 : index
    %c14_4063 = arith.constant 14 : index
    %c3_4064 = arith.constant 3 : index
    %c14_4065 = arith.constant 14 : index
    %c0_4066 = arith.constant 0 : index
    %c256_4067 = arith.constant 256 : index
    %c1_4068 = arith.constant 1 : index
    %c256_4069 = arith.constant 256 : index
    %c2_4070 = arith.constant 2 : index
    %c3_4071 = arith.constant 3 : index
    %c3_4072 = arith.constant 3 : index
    %c3_4073 = arith.constant 3 : index
    %3861 = arith.index_cast %3856 : i64 to index
    %c0_4074 = arith.constant 0 : index
    %3862 = arith.remsi %c256_4061, %3861 : index
    %3863 = arith.cmpi eq, %c0_4074, %3862 : index
    cf.assert %3863, "invalid: groups must divide input channel size evenly."
    %c0_4075 = arith.constant 0 : index
    %3864 = arith.remsi %c256_4067, %3861 : index
    %3865 = arith.cmpi eq, %c0_4075, %3864 : index
    cf.assert %3865, "invalid: groups must divide weight batch size evenly."
    %c1_i64_4076 = arith.constant 1 : i64
    %c1_i64_4077 = arith.constant 1 : i64
    %c1_i64_4078 = arith.constant 1 : i64
    %c1_i64_4079 = arith.constant 1 : i64
    %cst_4080 = arith.constant 0.000000e+00 : f32
    %c0_4081 = arith.constant 0 : index
    %c1_4082 = arith.constant 1 : index
    %c1_4083 = arith.constant 1 : index
    %c256_4084 = arith.constant 256 : index
    %c2_4085 = arith.constant 2 : index
    %c14_4086 = arith.constant 14 : index
    %c3_4087 = arith.constant 3 : index
    %c14_4088 = arith.constant 14 : index
    %c0_i64_4089 = arith.constant 0 : i64
    %3866 = arith.index_cast %c0_i64_4089 : i64 to index
    %3867 = arith.index_cast %c0_i64_4089 : i64 to index
    %3868 = arith.index_cast %3857 : i64 to index
    %3869 = arith.index_cast %3858 : i64 to index
    %padded_4090 = tensor.pad %cast_4014 low[%3866, %3867, %3868, %3869] high[%3866, %3867, %3868, %3869] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_4080 : f32
    } : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
    %3870 = arith.index_cast %c3_4071 : index to i64
    %c1_i64_4091 = arith.constant 1 : i64
    %c2_i64_4092 = arith.constant 2 : i64
    %3871 = arith.muli %3857, %c2_i64_4092 : i64
    %3872 = arith.index_cast %c14_4063 : index to i64
    %3873 = arith.addi %3872, %3871 : i64
    %3874 = arith.subi %3870, %c1_i64_4091 : i64
    %3875 = arith.muli %c1_i64_4076, %3874 : i64
    %3876 = arith.subi %3873, %3875 : i64
    %3877 = arith.subi %3876, %c1_i64_4091 : i64
    %3878 = arith.floordivsi %3877, %c1_i64_4078 : i64
    %3879 = arith.addi %3878, %c1_i64_4091 : i64
    %3880 = arith.index_cast %3879 : i64 to index
    %3881 = arith.index_cast %c3_4073 : index to i64
    %c1_i64_4093 = arith.constant 1 : i64
    %c2_i64_4094 = arith.constant 2 : i64
    %3882 = arith.muli %3858, %c2_i64_4094 : i64
    %3883 = arith.index_cast %c14_4065 : index to i64
    %3884 = arith.addi %3883, %3882 : i64
    %3885 = arith.subi %3881, %c1_i64_4093 : i64
    %3886 = arith.muli %c1_i64_4077, %3885 : i64
    %3887 = arith.subi %3884, %3886 : i64
    %3888 = arith.subi %3887, %c1_i64_4093 : i64
    %3889 = arith.floordivsi %3888, %c1_i64_4079 : i64
    %3890 = arith.addi %3889, %c1_i64_4093 : i64
    %3891 = arith.index_cast %3890 : i64 to index
    %3892 = tensor.empty(%3880, %3891) : tensor<1x256x?x?xf32>
    %3893 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4048 : tensor<256xf32>) outs(%3892 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %3894 = arith.floordivsi %c256_4061, %3861 : index
    %3895 = arith.floordivsi %c256_4067, %3861 : index
    %c0_4095 = arith.constant 0 : index
    %c1_4096 = arith.constant 1 : index
    %3896 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4090, %cast_4037 : tensor<?x?x?x?xf32>, tensor<256x256x3x3xf32>) outs(%3893 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_4097 = tensor.cast %3896 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
    %c1_4098 = arith.constant 1 : index
    %c1_4099 = arith.constant 1 : index
    %c256_4100 = arith.constant 256 : index
    %c2_4101 = arith.constant 2 : index
    %c14_4102 = arith.constant 14 : index
    %c3_4103 = arith.constant 3 : index
    %c14_4104 = arith.constant 14 : index
    %3897 = tensor.empty() : tensor<1x256x14x14xf32>
    %3898 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4097 : tensor<1x256x14x14xf32>) outs(%3897 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_4105 = tensor.cast %3898 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %3899 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3900 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4106 = torch.constant.int 12
    %3901 = torch.aten.item %3899 : !torch.vtensor<[],f32> -> !torch.float
    %3902 = torch_c.to_f64 %3901
    %3903 = torch.aten.item %3900 : !torch.vtensor<[],si8> -> !torch.int
    %3904 = torch_c.to_i64 %3903
    %c1_4107 = arith.constant 1 : index
    %c1_4108 = arith.constant 1 : index
    %c256_4109 = arith.constant 256 : index
    %c2_4110 = arith.constant 2 : index
    %c14_4111 = arith.constant 14 : index
    %c3_4112 = arith.constant 3 : index
    %c14_4113 = arith.constant 14 : index
    %3905 = tensor.empty() : tensor<1x256x14x14xi8>
    %3906 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4105 : tensor<1x256x14x14xf32>) outs(%3905 : tensor<1x256x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3903
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3901
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x14x14xi8>
    %cast_4114 = tensor.cast %3906 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %cast_4115 = tensor.cast %cast_4114 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %3907 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3908 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3909 = torch.aten.item %3907 : !torch.vtensor<[],f32> -> !torch.float
    %3910 = torch_c.to_f64 %3909
    %3911 = torch.aten.item %3908 : !torch.vtensor<[],si8> -> !torch.int
    %3912 = torch_c.to_i64 %3911
    %cast_4116 = tensor.cast %cast_4115 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %c1_4117 = arith.constant 1 : index
    %c1_4118 = arith.constant 1 : index
    %c256_4119 = arith.constant 256 : index
    %c2_4120 = arith.constant 2 : index
    %c14_4121 = arith.constant 14 : index
    %c3_4122 = arith.constant 3 : index
    %c14_4123 = arith.constant 14 : index
    %3913 = tensor.empty() : tensor<1x256x14x14xf32>
    %3914 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4116 : tensor<1x256x14x14xi8>) outs(%3913 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3911
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3909
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_4124 = tensor.cast %3914 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %3915 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3916 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4125 = torch.constant.int 12
    %3917 = torch.aten.item %3915 : !torch.vtensor<[],f32> -> !torch.float
    %3918 = torch_c.to_f64 %3917
    %3919 = torch.aten.item %3916 : !torch.vtensor<[],si8> -> !torch.int
    %3920 = torch_c.to_i64 %3919
    %c1_4126 = arith.constant 1 : index
    %c0_4127 = arith.constant 0 : index
    %c1024_4128 = arith.constant 1024 : index
    %c1_4129 = arith.constant 1 : index
    %c256_4130 = arith.constant 256 : index
    %3921 = tensor.empty() : tensor<1024x256x1x1xi8>
    %3922 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%146 : tensor<1024x256x1x1xf32>) outs(%3921 : tensor<1024x256x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3919
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3917
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024x256x1x1xi8>
    %cast_4131 = tensor.cast %3922 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %cast_4132 = tensor.cast %cast_4131 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %3923 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3924 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3925 = torch.aten.item %3923 : !torch.vtensor<[],f32> -> !torch.float
    %3926 = torch_c.to_f64 %3925
    %3927 = torch.aten.item %3924 : !torch.vtensor<[],si8> -> !torch.int
    %3928 = torch_c.to_i64 %3927
    %cast_4133 = tensor.cast %cast_4132 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %c1_4134 = arith.constant 1 : index
    %c0_4135 = arith.constant 0 : index
    %c1024_4136 = arith.constant 1024 : index
    %c1_4137 = arith.constant 1 : index
    %c256_4138 = arith.constant 256 : index
    %3929 = tensor.empty() : tensor<1024x256x1x1xf32>
    %3930 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4133 : tensor<1024x256x1x1xi8>) outs(%3929 : tensor<1024x256x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3927
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3925
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024x256x1x1xf32>
    %cast_4139 = tensor.cast %3930 : tensor<1024x256x1x1xf32> to tensor<1024x256x1x1xf32>
    %3931 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3932 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4140 = torch.constant.int 12
    %3933 = torch.aten.item %3931 : !torch.vtensor<[],f32> -> !torch.float
    %3934 = torch_c.to_f64 %3933
    %3935 = torch.aten.item %3932 : !torch.vtensor<[],si8> -> !torch.int
    %3936 = torch_c.to_i64 %3935
    %c1_4141 = arith.constant 1 : index
    %c0_4142 = arith.constant 0 : index
    %c1024_4143 = arith.constant 1024 : index
    %3937 = tensor.empty() : tensor<1024xi8>
    %3938 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%148 : tensor<1024xf32>) outs(%3937 : tensor<1024xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3935
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3933
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024xi8>
    %cast_4144 = tensor.cast %3938 : tensor<1024xi8> to tensor<1024xi8>
    %cast_4145 = tensor.cast %cast_4144 : tensor<1024xi8> to tensor<1024xi8>
    %3939 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3940 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %3941 = torch.aten.item %3939 : !torch.vtensor<[],f32> -> !torch.float
    %3942 = torch_c.to_f64 %3941
    %3943 = torch.aten.item %3940 : !torch.vtensor<[],si8> -> !torch.int
    %3944 = torch_c.to_i64 %3943
    %cast_4146 = tensor.cast %cast_4145 : tensor<1024xi8> to tensor<1024xi8>
    %c1_4147 = arith.constant 1 : index
    %c0_4148 = arith.constant 0 : index
    %c1024_4149 = arith.constant 1024 : index
    %3945 = tensor.empty() : tensor<1024xf32>
    %3946 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4146 : tensor<1024xi8>) outs(%3945 : tensor<1024xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %3943
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %3941
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024xf32>
    %cast_4150 = tensor.cast %3946 : tensor<1024xf32> to tensor<1024xf32>
    %int0_4151 = torch.constant.int 0
    %int0_4152 = torch.constant.int 0
    %int1_4153 = torch.constant.int 1
    %int1_4154 = torch.constant.int 1
    %int1_4155 = torch.constant.int 1
    %int1_4156 = torch.constant.int 1
    %int0_4157 = torch.constant.int 0
    %3947 = torch.prim.ListConstruct %int0_4151, %int0_4152 : (!torch.int, !torch.int) -> !torch.list<int>
    %3948 = torch.prim.ListConstruct %int1_4153, %int1_4154 : (!torch.int, !torch.int) -> !torch.list<int>
    %3949 = torch.prim.ListConstruct %int1_4155, %int1_4156 : (!torch.int, !torch.int) -> !torch.list<int>
    %3950 = torch.prim.ListConstruct %int0_4157, %int0_4157 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_4158 = torch.constant.bool false
    %int1_4159 = torch.constant.int 1
    %3951 = torch_c.to_i64 %int1_4159
    %3952 = torch_c.to_i64 %int0_4151
    %3953 = torch_c.to_i64 %int0_4152
    %3954 = torch_c.to_i64 %int0_4157
    %3955 = torch_c.to_i64 %int0_4157
    %c0_4160 = arith.constant 0 : index
    %c1_4161 = arith.constant 1 : index
    %c1_4162 = arith.constant 1 : index
    %c256_4163 = arith.constant 256 : index
    %c2_4164 = arith.constant 2 : index
    %c14_4165 = arith.constant 14 : index
    %c3_4166 = arith.constant 3 : index
    %c14_4167 = arith.constant 14 : index
    %c0_4168 = arith.constant 0 : index
    %c1024_4169 = arith.constant 1024 : index
    %c1_4170 = arith.constant 1 : index
    %c256_4171 = arith.constant 256 : index
    %c2_4172 = arith.constant 2 : index
    %c1_4173 = arith.constant 1 : index
    %c3_4174 = arith.constant 3 : index
    %c1_4175 = arith.constant 1 : index
    %3956 = arith.index_cast %3951 : i64 to index
    %c0_4176 = arith.constant 0 : index
    %3957 = arith.remsi %c256_4163, %3956 : index
    %3958 = arith.cmpi eq, %c0_4176, %3957 : index
    cf.assert %3958, "invalid: groups must divide input channel size evenly."
    %c0_4177 = arith.constant 0 : index
    %3959 = arith.remsi %c1024_4169, %3956 : index
    %3960 = arith.cmpi eq, %c0_4177, %3959 : index
    cf.assert %3960, "invalid: groups must divide weight batch size evenly."
    %c1_i64_4178 = arith.constant 1 : i64
    %c1_i64_4179 = arith.constant 1 : i64
    %c1_i64_4180 = arith.constant 1 : i64
    %c1_i64_4181 = arith.constant 1 : i64
    %cst_4182 = arith.constant 0.000000e+00 : f32
    %c0_4183 = arith.constant 0 : index
    %c1_4184 = arith.constant 1 : index
    %c1_4185 = arith.constant 1 : index
    %c256_4186 = arith.constant 256 : index
    %c2_4187 = arith.constant 2 : index
    %c14_4188 = arith.constant 14 : index
    %c3_4189 = arith.constant 3 : index
    %c14_4190 = arith.constant 14 : index
    %c0_i64_4191 = arith.constant 0 : i64
    %3961 = arith.index_cast %c0_i64_4191 : i64 to index
    %3962 = arith.index_cast %c0_i64_4191 : i64 to index
    %3963 = arith.index_cast %3952 : i64 to index
    %3964 = arith.index_cast %3953 : i64 to index
    %padded_4192 = tensor.pad %cast_4124 low[%3961, %3962, %3963, %3964] high[%3961, %3962, %3963, %3964] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_4182 : f32
    } : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
    %3965 = arith.index_cast %c1_4173 : index to i64
    %c1_i64_4193 = arith.constant 1 : i64
    %c2_i64_4194 = arith.constant 2 : i64
    %3966 = arith.muli %3952, %c2_i64_4194 : i64
    %3967 = arith.index_cast %c14_4165 : index to i64
    %3968 = arith.addi %3967, %3966 : i64
    %3969 = arith.subi %3965, %c1_i64_4193 : i64
    %3970 = arith.muli %c1_i64_4178, %3969 : i64
    %3971 = arith.subi %3968, %3970 : i64
    %3972 = arith.subi %3971, %c1_i64_4193 : i64
    %3973 = arith.floordivsi %3972, %c1_i64_4180 : i64
    %3974 = arith.addi %3973, %c1_i64_4193 : i64
    %3975 = arith.index_cast %3974 : i64 to index
    %3976 = arith.index_cast %c1_4175 : index to i64
    %c1_i64_4195 = arith.constant 1 : i64
    %c2_i64_4196 = arith.constant 2 : i64
    %3977 = arith.muli %3953, %c2_i64_4196 : i64
    %3978 = arith.index_cast %c14_4167 : index to i64
    %3979 = arith.addi %3978, %3977 : i64
    %3980 = arith.subi %3976, %c1_i64_4195 : i64
    %3981 = arith.muli %c1_i64_4179, %3980 : i64
    %3982 = arith.subi %3979, %3981 : i64
    %3983 = arith.subi %3982, %c1_i64_4195 : i64
    %3984 = arith.floordivsi %3983, %c1_i64_4181 : i64
    %3985 = arith.addi %3984, %c1_i64_4195 : i64
    %3986 = arith.index_cast %3985 : i64 to index
    %3987 = tensor.empty(%3975, %3986) : tensor<1x1024x?x?xf32>
    %3988 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4150 : tensor<1024xf32>) outs(%3987 : tensor<1x1024x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x1024x?x?xf32>
    %3989 = arith.floordivsi %c256_4163, %3956 : index
    %3990 = arith.floordivsi %c1024_4169, %3956 : index
    %c0_4197 = arith.constant 0 : index
    %c1_4198 = arith.constant 1 : index
    %3991 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4192, %cast_4139 : tensor<?x?x?x?xf32>, tensor<1024x256x1x1xf32>) outs(%3988 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
    %cast_4199 = tensor.cast %3991 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
    %3992 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %3993 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4200 = torch.constant.int 12
    %3994 = torch.aten.item %3992 : !torch.vtensor<[],f32> -> !torch.float
    %3995 = torch_c.to_f64 %3994
    %3996 = torch.aten.item %3993 : !torch.vtensor<[],si8> -> !torch.int
    %3997 = torch_c.to_i64 %3996
    %c1_4201 = arith.constant 1 : index
    %c1_4202 = arith.constant 1 : index
    %c1024_4203 = arith.constant 1024 : index
    %c2_4204 = arith.constant 2 : index
    %c14_4205 = arith.constant 14 : index
    %c3_4206 = arith.constant 3 : index
    %c14_4207 = arith.constant 14 : index
    %3998 = tensor.empty() : tensor<1x1024x14x14xi8>
    %3999 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4199 : tensor<1x1024x14x14xf32>) outs(%3998 : tensor<1x1024x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %3996
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %3994
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x1024x14x14xi8>
    %cast_4208 = tensor.cast %3999 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %cast_4209 = tensor.cast %cast_4208 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %4000 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4001 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4002 = torch.aten.item %4000 : !torch.vtensor<[],f32> -> !torch.float
    %4003 = torch_c.to_f64 %4002
    %4004 = torch.aten.item %4001 : !torch.vtensor<[],si8> -> !torch.int
    %4005 = torch_c.to_i64 %4004
    %cast_4210 = tensor.cast %cast_4209 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %c1_4211 = arith.constant 1 : index
    %c1_4212 = arith.constant 1 : index
    %c1024_4213 = arith.constant 1024 : index
    %c2_4214 = arith.constant 2 : index
    %c14_4215 = arith.constant 14 : index
    %c3_4216 = arith.constant 3 : index
    %c14_4217 = arith.constant 14 : index
    %4006 = tensor.empty() : tensor<1x1024x14x14xf32>
    %4007 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4210 : tensor<1x1024x14x14xi8>) outs(%4006 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4004
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4002
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_4218 = tensor.cast %4007 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %int1_4219 = torch.constant.int 1
    %4008 = torch_c.to_i64 %int1_4219
    %c1_4220 = arith.constant 1 : index
    %c1_4221 = arith.constant 1 : index
    %c1024_4222 = arith.constant 1024 : index
    %c2_4223 = arith.constant 2 : index
    %c14_4224 = arith.constant 14 : index
    %c3_4225 = arith.constant 3 : index
    %c14_4226 = arith.constant 14 : index
    %c1_4227 = arith.constant 1 : index
    %c1024_4228 = arith.constant 1024 : index
    %4009 = arith.cmpi eq, %c1024_4222, %c1024_4228 : index
    cf.assert %4009, "mismatched size for broadcast"
    %c2_4229 = arith.constant 2 : index
    %c14_4230 = arith.constant 14 : index
    %4010 = arith.cmpi eq, %c14_4224, %c14_4230 : index
    cf.assert %4010, "mismatched size for broadcast"
    %c3_4231 = arith.constant 3 : index
    %c14_4232 = arith.constant 14 : index
    %4011 = arith.cmpi eq, %c14_4226, %c14_4232 : index
    cf.assert %4011, "mismatched size for broadcast"
    %4012 = tensor.empty() : tensor<1x1024x14x14xf32>
    %4013 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4218, %cast_3912 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%4012 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %4008 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_4233 = tensor.cast %4013 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %c1_4234 = arith.constant 1 : index
    %c1_4235 = arith.constant 1 : index
    %c1024_4236 = arith.constant 1024 : index
    %c2_4237 = arith.constant 2 : index
    %c14_4238 = arith.constant 14 : index
    %c3_4239 = arith.constant 3 : index
    %c14_4240 = arith.constant 14 : index
    %4014 = tensor.empty() : tensor<1x1024x14x14xf32>
    %4015 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4233 : tensor<1x1024x14x14xf32>) outs(%4014 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_4241 = tensor.cast %4015 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %4016 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4017 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4242 = torch.constant.int 12
    %4018 = torch.aten.item %4016 : !torch.vtensor<[],f32> -> !torch.float
    %4019 = torch_c.to_f64 %4018
    %4020 = torch.aten.item %4017 : !torch.vtensor<[],si8> -> !torch.int
    %4021 = torch_c.to_i64 %4020
    %c1_4243 = arith.constant 1 : index
    %c1_4244 = arith.constant 1 : index
    %c1024_4245 = arith.constant 1024 : index
    %c2_4246 = arith.constant 2 : index
    %c14_4247 = arith.constant 14 : index
    %c3_4248 = arith.constant 3 : index
    %c14_4249 = arith.constant 14 : index
    %4022 = tensor.empty() : tensor<1x1024x14x14xi8>
    %4023 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4241 : tensor<1x1024x14x14xf32>) outs(%4022 : tensor<1x1024x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4020
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4018
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x1024x14x14xi8>
    %cast_4250 = tensor.cast %4023 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %cast_4251 = tensor.cast %cast_4250 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %4024 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4025 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4026 = torch.aten.item %4024 : !torch.vtensor<[],f32> -> !torch.float
    %4027 = torch_c.to_f64 %4026
    %4028 = torch.aten.item %4025 : !torch.vtensor<[],si8> -> !torch.int
    %4029 = torch_c.to_i64 %4028
    %cast_4252 = tensor.cast %cast_4251 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %c1_4253 = arith.constant 1 : index
    %c1_4254 = arith.constant 1 : index
    %c1024_4255 = arith.constant 1024 : index
    %c2_4256 = arith.constant 2 : index
    %c14_4257 = arith.constant 14 : index
    %c3_4258 = arith.constant 3 : index
    %c14_4259 = arith.constant 14 : index
    %4030 = tensor.empty() : tensor<1x1024x14x14xf32>
    %4031 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4252 : tensor<1x1024x14x14xi8>) outs(%4030 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4028
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4026
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_4260 = tensor.cast %4031 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %4032 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %4033 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4261 = torch.constant.int 12
    %4034 = torch.aten.item %4032 : !torch.vtensor<[],f32> -> !torch.float
    %4035 = torch_c.to_f64 %4034
    %4036 = torch.aten.item %4033 : !torch.vtensor<[],si8> -> !torch.int
    %4037 = torch_c.to_i64 %4036
    %c1_4262 = arith.constant 1 : index
    %c0_4263 = arith.constant 0 : index
    %c256_4264 = arith.constant 256 : index
    %c1_4265 = arith.constant 1 : index
    %c1024_4266 = arith.constant 1024 : index
    %4038 = tensor.empty() : tensor<256x1024x1x1xi8>
    %4039 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%150 : tensor<256x1024x1x1xf32>) outs(%4038 : tensor<256x1024x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4036
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4034
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x1024x1x1xi8>
    %cast_4267 = tensor.cast %4039 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %cast_4268 = tensor.cast %cast_4267 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %4040 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %4041 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4042 = torch.aten.item %4040 : !torch.vtensor<[],f32> -> !torch.float
    %4043 = torch_c.to_f64 %4042
    %4044 = torch.aten.item %4041 : !torch.vtensor<[],si8> -> !torch.int
    %4045 = torch_c.to_i64 %4044
    %cast_4269 = tensor.cast %cast_4268 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %c1_4270 = arith.constant 1 : index
    %c0_4271 = arith.constant 0 : index
    %c256_4272 = arith.constant 256 : index
    %c1_4273 = arith.constant 1 : index
    %c1024_4274 = arith.constant 1024 : index
    %4046 = tensor.empty() : tensor<256x1024x1x1xf32>
    %4047 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4269 : tensor<256x1024x1x1xi8>) outs(%4046 : tensor<256x1024x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4044
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4042
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x1024x1x1xf32>
    %cast_4275 = tensor.cast %4047 : tensor<256x1024x1x1xf32> to tensor<256x1024x1x1xf32>
    %4048 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4049 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4276 = torch.constant.int 12
    %4050 = torch.aten.item %4048 : !torch.vtensor<[],f32> -> !torch.float
    %4051 = torch_c.to_f64 %4050
    %4052 = torch.aten.item %4049 : !torch.vtensor<[],si8> -> !torch.int
    %4053 = torch_c.to_i64 %4052
    %c1_4277 = arith.constant 1 : index
    %c0_4278 = arith.constant 0 : index
    %c256_4279 = arith.constant 256 : index
    %4054 = tensor.empty() : tensor<256xi8>
    %4055 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%152 : tensor<256xf32>) outs(%4054 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4052
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4050
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_4280 = tensor.cast %4055 : tensor<256xi8> to tensor<256xi8>
    %cast_4281 = tensor.cast %cast_4280 : tensor<256xi8> to tensor<256xi8>
    %4056 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4057 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4058 = torch.aten.item %4056 : !torch.vtensor<[],f32> -> !torch.float
    %4059 = torch_c.to_f64 %4058
    %4060 = torch.aten.item %4057 : !torch.vtensor<[],si8> -> !torch.int
    %4061 = torch_c.to_i64 %4060
    %cast_4282 = tensor.cast %cast_4281 : tensor<256xi8> to tensor<256xi8>
    %c1_4283 = arith.constant 1 : index
    %c0_4284 = arith.constant 0 : index
    %c256_4285 = arith.constant 256 : index
    %4062 = tensor.empty() : tensor<256xf32>
    %4063 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4282 : tensor<256xi8>) outs(%4062 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4060
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4058
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_4286 = tensor.cast %4063 : tensor<256xf32> to tensor<256xf32>
    %int0_4287 = torch.constant.int 0
    %int0_4288 = torch.constant.int 0
    %int1_4289 = torch.constant.int 1
    %int1_4290 = torch.constant.int 1
    %int1_4291 = torch.constant.int 1
    %int1_4292 = torch.constant.int 1
    %int0_4293 = torch.constant.int 0
    %4064 = torch.prim.ListConstruct %int0_4287, %int0_4288 : (!torch.int, !torch.int) -> !torch.list<int>
    %4065 = torch.prim.ListConstruct %int1_4289, %int1_4290 : (!torch.int, !torch.int) -> !torch.list<int>
    %4066 = torch.prim.ListConstruct %int1_4291, %int1_4292 : (!torch.int, !torch.int) -> !torch.list<int>
    %4067 = torch.prim.ListConstruct %int0_4293, %int0_4293 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_4294 = torch.constant.bool false
    %int1_4295 = torch.constant.int 1
    %4068 = torch_c.to_i64 %int1_4295
    %4069 = torch_c.to_i64 %int0_4287
    %4070 = torch_c.to_i64 %int0_4288
    %4071 = torch_c.to_i64 %int0_4293
    %4072 = torch_c.to_i64 %int0_4293
    %c0_4296 = arith.constant 0 : index
    %c1_4297 = arith.constant 1 : index
    %c1_4298 = arith.constant 1 : index
    %c1024_4299 = arith.constant 1024 : index
    %c2_4300 = arith.constant 2 : index
    %c14_4301 = arith.constant 14 : index
    %c3_4302 = arith.constant 3 : index
    %c14_4303 = arith.constant 14 : index
    %c0_4304 = arith.constant 0 : index
    %c256_4305 = arith.constant 256 : index
    %c1_4306 = arith.constant 1 : index
    %c1024_4307 = arith.constant 1024 : index
    %c2_4308 = arith.constant 2 : index
    %c1_4309 = arith.constant 1 : index
    %c3_4310 = arith.constant 3 : index
    %c1_4311 = arith.constant 1 : index
    %4073 = arith.index_cast %4068 : i64 to index
    %c0_4312 = arith.constant 0 : index
    %4074 = arith.remsi %c1024_4299, %4073 : index
    %4075 = arith.cmpi eq, %c0_4312, %4074 : index
    cf.assert %4075, "invalid: groups must divide input channel size evenly."
    %c0_4313 = arith.constant 0 : index
    %4076 = arith.remsi %c256_4305, %4073 : index
    %4077 = arith.cmpi eq, %c0_4313, %4076 : index
    cf.assert %4077, "invalid: groups must divide weight batch size evenly."
    %c1_i64_4314 = arith.constant 1 : i64
    %c1_i64_4315 = arith.constant 1 : i64
    %c1_i64_4316 = arith.constant 1 : i64
    %c1_i64_4317 = arith.constant 1 : i64
    %cst_4318 = arith.constant 0.000000e+00 : f32
    %c0_4319 = arith.constant 0 : index
    %c1_4320 = arith.constant 1 : index
    %c1_4321 = arith.constant 1 : index
    %c1024_4322 = arith.constant 1024 : index
    %c2_4323 = arith.constant 2 : index
    %c14_4324 = arith.constant 14 : index
    %c3_4325 = arith.constant 3 : index
    %c14_4326 = arith.constant 14 : index
    %c0_i64_4327 = arith.constant 0 : i64
    %4078 = arith.index_cast %c0_i64_4327 : i64 to index
    %4079 = arith.index_cast %c0_i64_4327 : i64 to index
    %4080 = arith.index_cast %4069 : i64 to index
    %4081 = arith.index_cast %4070 : i64 to index
    %padded_4328 = tensor.pad %cast_4260 low[%4078, %4079, %4080, %4081] high[%4078, %4079, %4080, %4081] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_4318 : f32
    } : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
    %4082 = arith.index_cast %c1_4309 : index to i64
    %c1_i64_4329 = arith.constant 1 : i64
    %c2_i64_4330 = arith.constant 2 : i64
    %4083 = arith.muli %4069, %c2_i64_4330 : i64
    %4084 = arith.index_cast %c14_4301 : index to i64
    %4085 = arith.addi %4084, %4083 : i64
    %4086 = arith.subi %4082, %c1_i64_4329 : i64
    %4087 = arith.muli %c1_i64_4314, %4086 : i64
    %4088 = arith.subi %4085, %4087 : i64
    %4089 = arith.subi %4088, %c1_i64_4329 : i64
    %4090 = arith.floordivsi %4089, %c1_i64_4316 : i64
    %4091 = arith.addi %4090, %c1_i64_4329 : i64
    %4092 = arith.index_cast %4091 : i64 to index
    %4093 = arith.index_cast %c1_4311 : index to i64
    %c1_i64_4331 = arith.constant 1 : i64
    %c2_i64_4332 = arith.constant 2 : i64
    %4094 = arith.muli %4070, %c2_i64_4332 : i64
    %4095 = arith.index_cast %c14_4303 : index to i64
    %4096 = arith.addi %4095, %4094 : i64
    %4097 = arith.subi %4093, %c1_i64_4331 : i64
    %4098 = arith.muli %c1_i64_4315, %4097 : i64
    %4099 = arith.subi %4096, %4098 : i64
    %4100 = arith.subi %4099, %c1_i64_4331 : i64
    %4101 = arith.floordivsi %4100, %c1_i64_4317 : i64
    %4102 = arith.addi %4101, %c1_i64_4331 : i64
    %4103 = arith.index_cast %4102 : i64 to index
    %4104 = tensor.empty(%4092, %4103) : tensor<1x256x?x?xf32>
    %4105 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4286 : tensor<256xf32>) outs(%4104 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %4106 = arith.floordivsi %c1024_4299, %4073 : index
    %4107 = arith.floordivsi %c256_4305, %4073 : index
    %c0_4333 = arith.constant 0 : index
    %c1_4334 = arith.constant 1 : index
    %4108 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4328, %cast_4275 : tensor<?x?x?x?xf32>, tensor<256x1024x1x1xf32>) outs(%4105 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_4335 = tensor.cast %4108 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
    %c1_4336 = arith.constant 1 : index
    %c1_4337 = arith.constant 1 : index
    %c256_4338 = arith.constant 256 : index
    %c2_4339 = arith.constant 2 : index
    %c14_4340 = arith.constant 14 : index
    %c3_4341 = arith.constant 3 : index
    %c14_4342 = arith.constant 14 : index
    %4109 = tensor.empty() : tensor<1x256x14x14xf32>
    %4110 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4335 : tensor<1x256x14x14xf32>) outs(%4109 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_4343 = tensor.cast %4110 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %4111 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4112 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4344 = torch.constant.int 12
    %4113 = torch.aten.item %4111 : !torch.vtensor<[],f32> -> !torch.float
    %4114 = torch_c.to_f64 %4113
    %4115 = torch.aten.item %4112 : !torch.vtensor<[],si8> -> !torch.int
    %4116 = torch_c.to_i64 %4115
    %c1_4345 = arith.constant 1 : index
    %c1_4346 = arith.constant 1 : index
    %c256_4347 = arith.constant 256 : index
    %c2_4348 = arith.constant 2 : index
    %c14_4349 = arith.constant 14 : index
    %c3_4350 = arith.constant 3 : index
    %c14_4351 = arith.constant 14 : index
    %4117 = tensor.empty() : tensor<1x256x14x14xi8>
    %4118 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4343 : tensor<1x256x14x14xf32>) outs(%4117 : tensor<1x256x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4115
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4113
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x14x14xi8>
    %cast_4352 = tensor.cast %4118 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %cast_4353 = tensor.cast %cast_4352 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %4119 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4120 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4121 = torch.aten.item %4119 : !torch.vtensor<[],f32> -> !torch.float
    %4122 = torch_c.to_f64 %4121
    %4123 = torch.aten.item %4120 : !torch.vtensor<[],si8> -> !torch.int
    %4124 = torch_c.to_i64 %4123
    %cast_4354 = tensor.cast %cast_4353 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %c1_4355 = arith.constant 1 : index
    %c1_4356 = arith.constant 1 : index
    %c256_4357 = arith.constant 256 : index
    %c2_4358 = arith.constant 2 : index
    %c14_4359 = arith.constant 14 : index
    %c3_4360 = arith.constant 3 : index
    %c14_4361 = arith.constant 14 : index
    %4125 = tensor.empty() : tensor<1x256x14x14xf32>
    %4126 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4354 : tensor<1x256x14x14xi8>) outs(%4125 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4123
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4121
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_4362 = tensor.cast %4126 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %4127 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4128 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4363 = torch.constant.int 12
    %4129 = torch.aten.item %4127 : !torch.vtensor<[],f32> -> !torch.float
    %4130 = torch_c.to_f64 %4129
    %4131 = torch.aten.item %4128 : !torch.vtensor<[],si8> -> !torch.int
    %4132 = torch_c.to_i64 %4131
    %c1_4364 = arith.constant 1 : index
    %c0_4365 = arith.constant 0 : index
    %c256_4366 = arith.constant 256 : index
    %c1_4367 = arith.constant 1 : index
    %c256_4368 = arith.constant 256 : index
    %c2_4369 = arith.constant 2 : index
    %c3_4370 = arith.constant 3 : index
    %c3_4371 = arith.constant 3 : index
    %c3_4372 = arith.constant 3 : index
    %4133 = tensor.empty() : tensor<256x256x3x3xi8>
    %4134 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%154 : tensor<256x256x3x3xf32>) outs(%4133 : tensor<256x256x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4131
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4129
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x256x3x3xi8>
    %cast_4373 = tensor.cast %4134 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %cast_4374 = tensor.cast %cast_4373 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %4135 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4136 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4137 = torch.aten.item %4135 : !torch.vtensor<[],f32> -> !torch.float
    %4138 = torch_c.to_f64 %4137
    %4139 = torch.aten.item %4136 : !torch.vtensor<[],si8> -> !torch.int
    %4140 = torch_c.to_i64 %4139
    %cast_4375 = tensor.cast %cast_4374 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %c1_4376 = arith.constant 1 : index
    %c0_4377 = arith.constant 0 : index
    %c256_4378 = arith.constant 256 : index
    %c1_4379 = arith.constant 1 : index
    %c256_4380 = arith.constant 256 : index
    %c2_4381 = arith.constant 2 : index
    %c3_4382 = arith.constant 3 : index
    %c3_4383 = arith.constant 3 : index
    %c3_4384 = arith.constant 3 : index
    %4141 = tensor.empty() : tensor<256x256x3x3xf32>
    %4142 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4375 : tensor<256x256x3x3xi8>) outs(%4141 : tensor<256x256x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4139
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4137
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x256x3x3xf32>
    %cast_4385 = tensor.cast %4142 : tensor<256x256x3x3xf32> to tensor<256x256x3x3xf32>
    %4143 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4144 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4386 = torch.constant.int 12
    %4145 = torch.aten.item %4143 : !torch.vtensor<[],f32> -> !torch.float
    %4146 = torch_c.to_f64 %4145
    %4147 = torch.aten.item %4144 : !torch.vtensor<[],si8> -> !torch.int
    %4148 = torch_c.to_i64 %4147
    %c1_4387 = arith.constant 1 : index
    %c0_4388 = arith.constant 0 : index
    %c256_4389 = arith.constant 256 : index
    %4149 = tensor.empty() : tensor<256xi8>
    %4150 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%156 : tensor<256xf32>) outs(%4149 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4147
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4145
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_4390 = tensor.cast %4150 : tensor<256xi8> to tensor<256xi8>
    %cast_4391 = tensor.cast %cast_4390 : tensor<256xi8> to tensor<256xi8>
    %4151 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4152 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4153 = torch.aten.item %4151 : !torch.vtensor<[],f32> -> !torch.float
    %4154 = torch_c.to_f64 %4153
    %4155 = torch.aten.item %4152 : !torch.vtensor<[],si8> -> !torch.int
    %4156 = torch_c.to_i64 %4155
    %cast_4392 = tensor.cast %cast_4391 : tensor<256xi8> to tensor<256xi8>
    %c1_4393 = arith.constant 1 : index
    %c0_4394 = arith.constant 0 : index
    %c256_4395 = arith.constant 256 : index
    %4157 = tensor.empty() : tensor<256xf32>
    %4158 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4392 : tensor<256xi8>) outs(%4157 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4155
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4153
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_4396 = tensor.cast %4158 : tensor<256xf32> to tensor<256xf32>
    %int1_4397 = torch.constant.int 1
    %int1_4398 = torch.constant.int 1
    %int1_4399 = torch.constant.int 1
    %int1_4400 = torch.constant.int 1
    %int1_4401 = torch.constant.int 1
    %int1_4402 = torch.constant.int 1
    %int0_4403 = torch.constant.int 0
    %4159 = torch.prim.ListConstruct %int1_4397, %int1_4398 : (!torch.int, !torch.int) -> !torch.list<int>
    %4160 = torch.prim.ListConstruct %int1_4399, %int1_4400 : (!torch.int, !torch.int) -> !torch.list<int>
    %4161 = torch.prim.ListConstruct %int1_4401, %int1_4402 : (!torch.int, !torch.int) -> !torch.list<int>
    %4162 = torch.prim.ListConstruct %int0_4403, %int0_4403 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_4404 = torch.constant.bool false
    %int1_4405 = torch.constant.int 1
    %4163 = torch_c.to_i64 %int1_4405
    %4164 = torch_c.to_i64 %int1_4397
    %4165 = torch_c.to_i64 %int1_4398
    %4166 = torch_c.to_i64 %int0_4403
    %4167 = torch_c.to_i64 %int0_4403
    %c0_4406 = arith.constant 0 : index
    %c1_4407 = arith.constant 1 : index
    %c1_4408 = arith.constant 1 : index
    %c256_4409 = arith.constant 256 : index
    %c2_4410 = arith.constant 2 : index
    %c14_4411 = arith.constant 14 : index
    %c3_4412 = arith.constant 3 : index
    %c14_4413 = arith.constant 14 : index
    %c0_4414 = arith.constant 0 : index
    %c256_4415 = arith.constant 256 : index
    %c1_4416 = arith.constant 1 : index
    %c256_4417 = arith.constant 256 : index
    %c2_4418 = arith.constant 2 : index
    %c3_4419 = arith.constant 3 : index
    %c3_4420 = arith.constant 3 : index
    %c3_4421 = arith.constant 3 : index
    %4168 = arith.index_cast %4163 : i64 to index
    %c0_4422 = arith.constant 0 : index
    %4169 = arith.remsi %c256_4409, %4168 : index
    %4170 = arith.cmpi eq, %c0_4422, %4169 : index
    cf.assert %4170, "invalid: groups must divide input channel size evenly."
    %c0_4423 = arith.constant 0 : index
    %4171 = arith.remsi %c256_4415, %4168 : index
    %4172 = arith.cmpi eq, %c0_4423, %4171 : index
    cf.assert %4172, "invalid: groups must divide weight batch size evenly."
    %c1_i64_4424 = arith.constant 1 : i64
    %c1_i64_4425 = arith.constant 1 : i64
    %c1_i64_4426 = arith.constant 1 : i64
    %c1_i64_4427 = arith.constant 1 : i64
    %cst_4428 = arith.constant 0.000000e+00 : f32
    %c0_4429 = arith.constant 0 : index
    %c1_4430 = arith.constant 1 : index
    %c1_4431 = arith.constant 1 : index
    %c256_4432 = arith.constant 256 : index
    %c2_4433 = arith.constant 2 : index
    %c14_4434 = arith.constant 14 : index
    %c3_4435 = arith.constant 3 : index
    %c14_4436 = arith.constant 14 : index
    %c0_i64_4437 = arith.constant 0 : i64
    %4173 = arith.index_cast %c0_i64_4437 : i64 to index
    %4174 = arith.index_cast %c0_i64_4437 : i64 to index
    %4175 = arith.index_cast %4164 : i64 to index
    %4176 = arith.index_cast %4165 : i64 to index
    %padded_4438 = tensor.pad %cast_4362 low[%4173, %4174, %4175, %4176] high[%4173, %4174, %4175, %4176] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_4428 : f32
    } : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
    %4177 = arith.index_cast %c3_4419 : index to i64
    %c1_i64_4439 = arith.constant 1 : i64
    %c2_i64_4440 = arith.constant 2 : i64
    %4178 = arith.muli %4164, %c2_i64_4440 : i64
    %4179 = arith.index_cast %c14_4411 : index to i64
    %4180 = arith.addi %4179, %4178 : i64
    %4181 = arith.subi %4177, %c1_i64_4439 : i64
    %4182 = arith.muli %c1_i64_4424, %4181 : i64
    %4183 = arith.subi %4180, %4182 : i64
    %4184 = arith.subi %4183, %c1_i64_4439 : i64
    %4185 = arith.floordivsi %4184, %c1_i64_4426 : i64
    %4186 = arith.addi %4185, %c1_i64_4439 : i64
    %4187 = arith.index_cast %4186 : i64 to index
    %4188 = arith.index_cast %c3_4421 : index to i64
    %c1_i64_4441 = arith.constant 1 : i64
    %c2_i64_4442 = arith.constant 2 : i64
    %4189 = arith.muli %4165, %c2_i64_4442 : i64
    %4190 = arith.index_cast %c14_4413 : index to i64
    %4191 = arith.addi %4190, %4189 : i64
    %4192 = arith.subi %4188, %c1_i64_4441 : i64
    %4193 = arith.muli %c1_i64_4425, %4192 : i64
    %4194 = arith.subi %4191, %4193 : i64
    %4195 = arith.subi %4194, %c1_i64_4441 : i64
    %4196 = arith.floordivsi %4195, %c1_i64_4427 : i64
    %4197 = arith.addi %4196, %c1_i64_4441 : i64
    %4198 = arith.index_cast %4197 : i64 to index
    %4199 = tensor.empty(%4187, %4198) : tensor<1x256x?x?xf32>
    %4200 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4396 : tensor<256xf32>) outs(%4199 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %4201 = arith.floordivsi %c256_4409, %4168 : index
    %4202 = arith.floordivsi %c256_4415, %4168 : index
    %c0_4443 = arith.constant 0 : index
    %c1_4444 = arith.constant 1 : index
    %4203 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4438, %cast_4385 : tensor<?x?x?x?xf32>, tensor<256x256x3x3xf32>) outs(%4200 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_4445 = tensor.cast %4203 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
    %c1_4446 = arith.constant 1 : index
    %c1_4447 = arith.constant 1 : index
    %c256_4448 = arith.constant 256 : index
    %c2_4449 = arith.constant 2 : index
    %c14_4450 = arith.constant 14 : index
    %c3_4451 = arith.constant 3 : index
    %c14_4452 = arith.constant 14 : index
    %4204 = tensor.empty() : tensor<1x256x14x14xf32>
    %4205 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4445 : tensor<1x256x14x14xf32>) outs(%4204 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_4453 = tensor.cast %4205 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %4206 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4207 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4454 = torch.constant.int 12
    %4208 = torch.aten.item %4206 : !torch.vtensor<[],f32> -> !torch.float
    %4209 = torch_c.to_f64 %4208
    %4210 = torch.aten.item %4207 : !torch.vtensor<[],si8> -> !torch.int
    %4211 = torch_c.to_i64 %4210
    %c1_4455 = arith.constant 1 : index
    %c1_4456 = arith.constant 1 : index
    %c256_4457 = arith.constant 256 : index
    %c2_4458 = arith.constant 2 : index
    %c14_4459 = arith.constant 14 : index
    %c3_4460 = arith.constant 3 : index
    %c14_4461 = arith.constant 14 : index
    %4212 = tensor.empty() : tensor<1x256x14x14xi8>
    %4213 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4453 : tensor<1x256x14x14xf32>) outs(%4212 : tensor<1x256x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4210
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4208
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x14x14xi8>
    %cast_4462 = tensor.cast %4213 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %cast_4463 = tensor.cast %cast_4462 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %4214 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4215 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4216 = torch.aten.item %4214 : !torch.vtensor<[],f32> -> !torch.float
    %4217 = torch_c.to_f64 %4216
    %4218 = torch.aten.item %4215 : !torch.vtensor<[],si8> -> !torch.int
    %4219 = torch_c.to_i64 %4218
    %cast_4464 = tensor.cast %cast_4463 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %c1_4465 = arith.constant 1 : index
    %c1_4466 = arith.constant 1 : index
    %c256_4467 = arith.constant 256 : index
    %c2_4468 = arith.constant 2 : index
    %c14_4469 = arith.constant 14 : index
    %c3_4470 = arith.constant 3 : index
    %c14_4471 = arith.constant 14 : index
    %4220 = tensor.empty() : tensor<1x256x14x14xf32>
    %4221 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4464 : tensor<1x256x14x14xi8>) outs(%4220 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4218
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4216
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_4472 = tensor.cast %4221 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %4222 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4223 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4473 = torch.constant.int 12
    %4224 = torch.aten.item %4222 : !torch.vtensor<[],f32> -> !torch.float
    %4225 = torch_c.to_f64 %4224
    %4226 = torch.aten.item %4223 : !torch.vtensor<[],si8> -> !torch.int
    %4227 = torch_c.to_i64 %4226
    %c1_4474 = arith.constant 1 : index
    %c0_4475 = arith.constant 0 : index
    %c1024_4476 = arith.constant 1024 : index
    %c1_4477 = arith.constant 1 : index
    %c256_4478 = arith.constant 256 : index
    %4228 = tensor.empty() : tensor<1024x256x1x1xi8>
    %4229 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%158 : tensor<1024x256x1x1xf32>) outs(%4228 : tensor<1024x256x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4226
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4224
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024x256x1x1xi8>
    %cast_4479 = tensor.cast %4229 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %cast_4480 = tensor.cast %cast_4479 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %4230 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4231 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4232 = torch.aten.item %4230 : !torch.vtensor<[],f32> -> !torch.float
    %4233 = torch_c.to_f64 %4232
    %4234 = torch.aten.item %4231 : !torch.vtensor<[],si8> -> !torch.int
    %4235 = torch_c.to_i64 %4234
    %cast_4481 = tensor.cast %cast_4480 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %c1_4482 = arith.constant 1 : index
    %c0_4483 = arith.constant 0 : index
    %c1024_4484 = arith.constant 1024 : index
    %c1_4485 = arith.constant 1 : index
    %c256_4486 = arith.constant 256 : index
    %4236 = tensor.empty() : tensor<1024x256x1x1xf32>
    %4237 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4481 : tensor<1024x256x1x1xi8>) outs(%4236 : tensor<1024x256x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4234
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4232
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024x256x1x1xf32>
    %cast_4487 = tensor.cast %4237 : tensor<1024x256x1x1xf32> to tensor<1024x256x1x1xf32>
    %4238 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4239 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4488 = torch.constant.int 12
    %4240 = torch.aten.item %4238 : !torch.vtensor<[],f32> -> !torch.float
    %4241 = torch_c.to_f64 %4240
    %4242 = torch.aten.item %4239 : !torch.vtensor<[],si8> -> !torch.int
    %4243 = torch_c.to_i64 %4242
    %c1_4489 = arith.constant 1 : index
    %c0_4490 = arith.constant 0 : index
    %c1024_4491 = arith.constant 1024 : index
    %4244 = tensor.empty() : tensor<1024xi8>
    %4245 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%160 : tensor<1024xf32>) outs(%4244 : tensor<1024xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4242
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4240
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024xi8>
    %cast_4492 = tensor.cast %4245 : tensor<1024xi8> to tensor<1024xi8>
    %cast_4493 = tensor.cast %cast_4492 : tensor<1024xi8> to tensor<1024xi8>
    %4246 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4247 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4248 = torch.aten.item %4246 : !torch.vtensor<[],f32> -> !torch.float
    %4249 = torch_c.to_f64 %4248
    %4250 = torch.aten.item %4247 : !torch.vtensor<[],si8> -> !torch.int
    %4251 = torch_c.to_i64 %4250
    %cast_4494 = tensor.cast %cast_4493 : tensor<1024xi8> to tensor<1024xi8>
    %c1_4495 = arith.constant 1 : index
    %c0_4496 = arith.constant 0 : index
    %c1024_4497 = arith.constant 1024 : index
    %4252 = tensor.empty() : tensor<1024xf32>
    %4253 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4494 : tensor<1024xi8>) outs(%4252 : tensor<1024xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4250
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4248
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024xf32>
    %cast_4498 = tensor.cast %4253 : tensor<1024xf32> to tensor<1024xf32>
    %int0_4499 = torch.constant.int 0
    %int0_4500 = torch.constant.int 0
    %int1_4501 = torch.constant.int 1
    %int1_4502 = torch.constant.int 1
    %int1_4503 = torch.constant.int 1
    %int1_4504 = torch.constant.int 1
    %int0_4505 = torch.constant.int 0
    %4254 = torch.prim.ListConstruct %int0_4499, %int0_4500 : (!torch.int, !torch.int) -> !torch.list<int>
    %4255 = torch.prim.ListConstruct %int1_4501, %int1_4502 : (!torch.int, !torch.int) -> !torch.list<int>
    %4256 = torch.prim.ListConstruct %int1_4503, %int1_4504 : (!torch.int, !torch.int) -> !torch.list<int>
    %4257 = torch.prim.ListConstruct %int0_4505, %int0_4505 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_4506 = torch.constant.bool false
    %int1_4507 = torch.constant.int 1
    %4258 = torch_c.to_i64 %int1_4507
    %4259 = torch_c.to_i64 %int0_4499
    %4260 = torch_c.to_i64 %int0_4500
    %4261 = torch_c.to_i64 %int0_4505
    %4262 = torch_c.to_i64 %int0_4505
    %c0_4508 = arith.constant 0 : index
    %c1_4509 = arith.constant 1 : index
    %c1_4510 = arith.constant 1 : index
    %c256_4511 = arith.constant 256 : index
    %c2_4512 = arith.constant 2 : index
    %c14_4513 = arith.constant 14 : index
    %c3_4514 = arith.constant 3 : index
    %c14_4515 = arith.constant 14 : index
    %c0_4516 = arith.constant 0 : index
    %c1024_4517 = arith.constant 1024 : index
    %c1_4518 = arith.constant 1 : index
    %c256_4519 = arith.constant 256 : index
    %c2_4520 = arith.constant 2 : index
    %c1_4521 = arith.constant 1 : index
    %c3_4522 = arith.constant 3 : index
    %c1_4523 = arith.constant 1 : index
    %4263 = arith.index_cast %4258 : i64 to index
    %c0_4524 = arith.constant 0 : index
    %4264 = arith.remsi %c256_4511, %4263 : index
    %4265 = arith.cmpi eq, %c0_4524, %4264 : index
    cf.assert %4265, "invalid: groups must divide input channel size evenly."
    %c0_4525 = arith.constant 0 : index
    %4266 = arith.remsi %c1024_4517, %4263 : index
    %4267 = arith.cmpi eq, %c0_4525, %4266 : index
    cf.assert %4267, "invalid: groups must divide weight batch size evenly."
    %c1_i64_4526 = arith.constant 1 : i64
    %c1_i64_4527 = arith.constant 1 : i64
    %c1_i64_4528 = arith.constant 1 : i64
    %c1_i64_4529 = arith.constant 1 : i64
    %cst_4530 = arith.constant 0.000000e+00 : f32
    %c0_4531 = arith.constant 0 : index
    %c1_4532 = arith.constant 1 : index
    %c1_4533 = arith.constant 1 : index
    %c256_4534 = arith.constant 256 : index
    %c2_4535 = arith.constant 2 : index
    %c14_4536 = arith.constant 14 : index
    %c3_4537 = arith.constant 3 : index
    %c14_4538 = arith.constant 14 : index
    %c0_i64_4539 = arith.constant 0 : i64
    %4268 = arith.index_cast %c0_i64_4539 : i64 to index
    %4269 = arith.index_cast %c0_i64_4539 : i64 to index
    %4270 = arith.index_cast %4259 : i64 to index
    %4271 = arith.index_cast %4260 : i64 to index
    %padded_4540 = tensor.pad %cast_4472 low[%4268, %4269, %4270, %4271] high[%4268, %4269, %4270, %4271] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_4530 : f32
    } : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
    %4272 = arith.index_cast %c1_4521 : index to i64
    %c1_i64_4541 = arith.constant 1 : i64
    %c2_i64_4542 = arith.constant 2 : i64
    %4273 = arith.muli %4259, %c2_i64_4542 : i64
    %4274 = arith.index_cast %c14_4513 : index to i64
    %4275 = arith.addi %4274, %4273 : i64
    %4276 = arith.subi %4272, %c1_i64_4541 : i64
    %4277 = arith.muli %c1_i64_4526, %4276 : i64
    %4278 = arith.subi %4275, %4277 : i64
    %4279 = arith.subi %4278, %c1_i64_4541 : i64
    %4280 = arith.floordivsi %4279, %c1_i64_4528 : i64
    %4281 = arith.addi %4280, %c1_i64_4541 : i64
    %4282 = arith.index_cast %4281 : i64 to index
    %4283 = arith.index_cast %c1_4523 : index to i64
    %c1_i64_4543 = arith.constant 1 : i64
    %c2_i64_4544 = arith.constant 2 : i64
    %4284 = arith.muli %4260, %c2_i64_4544 : i64
    %4285 = arith.index_cast %c14_4515 : index to i64
    %4286 = arith.addi %4285, %4284 : i64
    %4287 = arith.subi %4283, %c1_i64_4543 : i64
    %4288 = arith.muli %c1_i64_4527, %4287 : i64
    %4289 = arith.subi %4286, %4288 : i64
    %4290 = arith.subi %4289, %c1_i64_4543 : i64
    %4291 = arith.floordivsi %4290, %c1_i64_4529 : i64
    %4292 = arith.addi %4291, %c1_i64_4543 : i64
    %4293 = arith.index_cast %4292 : i64 to index
    %4294 = tensor.empty(%4282, %4293) : tensor<1x1024x?x?xf32>
    %4295 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4498 : tensor<1024xf32>) outs(%4294 : tensor<1x1024x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x1024x?x?xf32>
    %4296 = arith.floordivsi %c256_4511, %4263 : index
    %4297 = arith.floordivsi %c1024_4517, %4263 : index
    %c0_4545 = arith.constant 0 : index
    %c1_4546 = arith.constant 1 : index
    %4298 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4540, %cast_4487 : tensor<?x?x?x?xf32>, tensor<1024x256x1x1xf32>) outs(%4295 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
    %cast_4547 = tensor.cast %4298 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
    %4299 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4300 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4548 = torch.constant.int 12
    %4301 = torch.aten.item %4299 : !torch.vtensor<[],f32> -> !torch.float
    %4302 = torch_c.to_f64 %4301
    %4303 = torch.aten.item %4300 : !torch.vtensor<[],si8> -> !torch.int
    %4304 = torch_c.to_i64 %4303
    %c1_4549 = arith.constant 1 : index
    %c1_4550 = arith.constant 1 : index
    %c1024_4551 = arith.constant 1024 : index
    %c2_4552 = arith.constant 2 : index
    %c14_4553 = arith.constant 14 : index
    %c3_4554 = arith.constant 3 : index
    %c14_4555 = arith.constant 14 : index
    %4305 = tensor.empty() : tensor<1x1024x14x14xi8>
    %4306 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4547 : tensor<1x1024x14x14xf32>) outs(%4305 : tensor<1x1024x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4303
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4301
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x1024x14x14xi8>
    %cast_4556 = tensor.cast %4306 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %cast_4557 = tensor.cast %cast_4556 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %4307 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4308 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4309 = torch.aten.item %4307 : !torch.vtensor<[],f32> -> !torch.float
    %4310 = torch_c.to_f64 %4309
    %4311 = torch.aten.item %4308 : !torch.vtensor<[],si8> -> !torch.int
    %4312 = torch_c.to_i64 %4311
    %cast_4558 = tensor.cast %cast_4557 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %c1_4559 = arith.constant 1 : index
    %c1_4560 = arith.constant 1 : index
    %c1024_4561 = arith.constant 1024 : index
    %c2_4562 = arith.constant 2 : index
    %c14_4563 = arith.constant 14 : index
    %c3_4564 = arith.constant 3 : index
    %c14_4565 = arith.constant 14 : index
    %4313 = tensor.empty() : tensor<1x1024x14x14xf32>
    %4314 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4558 : tensor<1x1024x14x14xi8>) outs(%4313 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4311
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4309
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_4566 = tensor.cast %4314 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %int1_4567 = torch.constant.int 1
    %4315 = torch_c.to_i64 %int1_4567
    %c1_4568 = arith.constant 1 : index
    %c1_4569 = arith.constant 1 : index
    %c1024_4570 = arith.constant 1024 : index
    %c2_4571 = arith.constant 2 : index
    %c14_4572 = arith.constant 14 : index
    %c3_4573 = arith.constant 3 : index
    %c14_4574 = arith.constant 14 : index
    %c1_4575 = arith.constant 1 : index
    %c1024_4576 = arith.constant 1024 : index
    %4316 = arith.cmpi eq, %c1024_4570, %c1024_4576 : index
    cf.assert %4316, "mismatched size for broadcast"
    %c2_4577 = arith.constant 2 : index
    %c14_4578 = arith.constant 14 : index
    %4317 = arith.cmpi eq, %c14_4572, %c14_4578 : index
    cf.assert %4317, "mismatched size for broadcast"
    %c3_4579 = arith.constant 3 : index
    %c14_4580 = arith.constant 14 : index
    %4318 = arith.cmpi eq, %c14_4574, %c14_4580 : index
    cf.assert %4318, "mismatched size for broadcast"
    %4319 = tensor.empty() : tensor<1x1024x14x14xf32>
    %4320 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4566, %cast_4260 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%4319 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %4315 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_4581 = tensor.cast %4320 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %c1_4582 = arith.constant 1 : index
    %c1_4583 = arith.constant 1 : index
    %c1024_4584 = arith.constant 1024 : index
    %c2_4585 = arith.constant 2 : index
    %c14_4586 = arith.constant 14 : index
    %c3_4587 = arith.constant 3 : index
    %c14_4588 = arith.constant 14 : index
    %4321 = tensor.empty() : tensor<1x1024x14x14xf32>
    %4322 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4581 : tensor<1x1024x14x14xf32>) outs(%4321 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_4589 = tensor.cast %4322 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %4323 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4324 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4590 = torch.constant.int 12
    %4325 = torch.aten.item %4323 : !torch.vtensor<[],f32> -> !torch.float
    %4326 = torch_c.to_f64 %4325
    %4327 = torch.aten.item %4324 : !torch.vtensor<[],si8> -> !torch.int
    %4328 = torch_c.to_i64 %4327
    %c1_4591 = arith.constant 1 : index
    %c1_4592 = arith.constant 1 : index
    %c1024_4593 = arith.constant 1024 : index
    %c2_4594 = arith.constant 2 : index
    %c14_4595 = arith.constant 14 : index
    %c3_4596 = arith.constant 3 : index
    %c14_4597 = arith.constant 14 : index
    %4329 = tensor.empty() : tensor<1x1024x14x14xi8>
    %4330 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4589 : tensor<1x1024x14x14xf32>) outs(%4329 : tensor<1x1024x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4327
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4325
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x1024x14x14xi8>
    %cast_4598 = tensor.cast %4330 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %cast_4599 = tensor.cast %cast_4598 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %4331 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4332 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4333 = torch.aten.item %4331 : !torch.vtensor<[],f32> -> !torch.float
    %4334 = torch_c.to_f64 %4333
    %4335 = torch.aten.item %4332 : !torch.vtensor<[],si8> -> !torch.int
    %4336 = torch_c.to_i64 %4335
    %cast_4600 = tensor.cast %cast_4599 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %c1_4601 = arith.constant 1 : index
    %c1_4602 = arith.constant 1 : index
    %c1024_4603 = arith.constant 1024 : index
    %c2_4604 = arith.constant 2 : index
    %c14_4605 = arith.constant 14 : index
    %c3_4606 = arith.constant 3 : index
    %c14_4607 = arith.constant 14 : index
    %4337 = tensor.empty() : tensor<1x1024x14x14xf32>
    %4338 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4600 : tensor<1x1024x14x14xi8>) outs(%4337 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4335
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4333
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_4608 = tensor.cast %4338 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %4339 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %4340 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4609 = torch.constant.int 12
    %4341 = torch.aten.item %4339 : !torch.vtensor<[],f32> -> !torch.float
    %4342 = torch_c.to_f64 %4341
    %4343 = torch.aten.item %4340 : !torch.vtensor<[],si8> -> !torch.int
    %4344 = torch_c.to_i64 %4343
    %c1_4610 = arith.constant 1 : index
    %c0_4611 = arith.constant 0 : index
    %c256_4612 = arith.constant 256 : index
    %c1_4613 = arith.constant 1 : index
    %c1024_4614 = arith.constant 1024 : index
    %4345 = tensor.empty() : tensor<256x1024x1x1xi8>
    %4346 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%162 : tensor<256x1024x1x1xf32>) outs(%4345 : tensor<256x1024x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4343
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4341
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x1024x1x1xi8>
    %cast_4615 = tensor.cast %4346 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %cast_4616 = tensor.cast %cast_4615 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %4347 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %4348 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4349 = torch.aten.item %4347 : !torch.vtensor<[],f32> -> !torch.float
    %4350 = torch_c.to_f64 %4349
    %4351 = torch.aten.item %4348 : !torch.vtensor<[],si8> -> !torch.int
    %4352 = torch_c.to_i64 %4351
    %cast_4617 = tensor.cast %cast_4616 : tensor<256x1024x1x1xi8> to tensor<256x1024x1x1xi8>
    %c1_4618 = arith.constant 1 : index
    %c0_4619 = arith.constant 0 : index
    %c256_4620 = arith.constant 256 : index
    %c1_4621 = arith.constant 1 : index
    %c1024_4622 = arith.constant 1024 : index
    %4353 = tensor.empty() : tensor<256x1024x1x1xf32>
    %4354 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4617 : tensor<256x1024x1x1xi8>) outs(%4353 : tensor<256x1024x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4351
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4349
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x1024x1x1xf32>
    %cast_4623 = tensor.cast %4354 : tensor<256x1024x1x1xf32> to tensor<256x1024x1x1xf32>
    %4355 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %4356 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4624 = torch.constant.int 12
    %4357 = torch.aten.item %4355 : !torch.vtensor<[],f32> -> !torch.float
    %4358 = torch_c.to_f64 %4357
    %4359 = torch.aten.item %4356 : !torch.vtensor<[],si8> -> !torch.int
    %4360 = torch_c.to_i64 %4359
    %c1_4625 = arith.constant 1 : index
    %c0_4626 = arith.constant 0 : index
    %c256_4627 = arith.constant 256 : index
    %4361 = tensor.empty() : tensor<256xi8>
    %4362 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%164 : tensor<256xf32>) outs(%4361 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4359
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4357
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_4628 = tensor.cast %4362 : tensor<256xi8> to tensor<256xi8>
    %cast_4629 = tensor.cast %cast_4628 : tensor<256xi8> to tensor<256xi8>
    %4363 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %4364 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4365 = torch.aten.item %4363 : !torch.vtensor<[],f32> -> !torch.float
    %4366 = torch_c.to_f64 %4365
    %4367 = torch.aten.item %4364 : !torch.vtensor<[],si8> -> !torch.int
    %4368 = torch_c.to_i64 %4367
    %cast_4630 = tensor.cast %cast_4629 : tensor<256xi8> to tensor<256xi8>
    %c1_4631 = arith.constant 1 : index
    %c0_4632 = arith.constant 0 : index
    %c256_4633 = arith.constant 256 : index
    %4369 = tensor.empty() : tensor<256xf32>
    %4370 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4630 : tensor<256xi8>) outs(%4369 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4367
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4365
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_4634 = tensor.cast %4370 : tensor<256xf32> to tensor<256xf32>
    %int0_4635 = torch.constant.int 0
    %int0_4636 = torch.constant.int 0
    %int1_4637 = torch.constant.int 1
    %int1_4638 = torch.constant.int 1
    %int1_4639 = torch.constant.int 1
    %int1_4640 = torch.constant.int 1
    %int0_4641 = torch.constant.int 0
    %4371 = torch.prim.ListConstruct %int0_4635, %int0_4636 : (!torch.int, !torch.int) -> !torch.list<int>
    %4372 = torch.prim.ListConstruct %int1_4637, %int1_4638 : (!torch.int, !torch.int) -> !torch.list<int>
    %4373 = torch.prim.ListConstruct %int1_4639, %int1_4640 : (!torch.int, !torch.int) -> !torch.list<int>
    %4374 = torch.prim.ListConstruct %int0_4641, %int0_4641 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_4642 = torch.constant.bool false
    %int1_4643 = torch.constant.int 1
    %4375 = torch_c.to_i64 %int1_4643
    %4376 = torch_c.to_i64 %int0_4635
    %4377 = torch_c.to_i64 %int0_4636
    %4378 = torch_c.to_i64 %int0_4641
    %4379 = torch_c.to_i64 %int0_4641
    %c0_4644 = arith.constant 0 : index
    %c1_4645 = arith.constant 1 : index
    %c1_4646 = arith.constant 1 : index
    %c1024_4647 = arith.constant 1024 : index
    %c2_4648 = arith.constant 2 : index
    %c14_4649 = arith.constant 14 : index
    %c3_4650 = arith.constant 3 : index
    %c14_4651 = arith.constant 14 : index
    %c0_4652 = arith.constant 0 : index
    %c256_4653 = arith.constant 256 : index
    %c1_4654 = arith.constant 1 : index
    %c1024_4655 = arith.constant 1024 : index
    %c2_4656 = arith.constant 2 : index
    %c1_4657 = arith.constant 1 : index
    %c3_4658 = arith.constant 3 : index
    %c1_4659 = arith.constant 1 : index
    %4380 = arith.index_cast %4375 : i64 to index
    %c0_4660 = arith.constant 0 : index
    %4381 = arith.remsi %c1024_4647, %4380 : index
    %4382 = arith.cmpi eq, %c0_4660, %4381 : index
    cf.assert %4382, "invalid: groups must divide input channel size evenly."
    %c0_4661 = arith.constant 0 : index
    %4383 = arith.remsi %c256_4653, %4380 : index
    %4384 = arith.cmpi eq, %c0_4661, %4383 : index
    cf.assert %4384, "invalid: groups must divide weight batch size evenly."
    %c1_i64_4662 = arith.constant 1 : i64
    %c1_i64_4663 = arith.constant 1 : i64
    %c1_i64_4664 = arith.constant 1 : i64
    %c1_i64_4665 = arith.constant 1 : i64
    %cst_4666 = arith.constant 0.000000e+00 : f32
    %c0_4667 = arith.constant 0 : index
    %c1_4668 = arith.constant 1 : index
    %c1_4669 = arith.constant 1 : index
    %c1024_4670 = arith.constant 1024 : index
    %c2_4671 = arith.constant 2 : index
    %c14_4672 = arith.constant 14 : index
    %c3_4673 = arith.constant 3 : index
    %c14_4674 = arith.constant 14 : index
    %c0_i64_4675 = arith.constant 0 : i64
    %4385 = arith.index_cast %c0_i64_4675 : i64 to index
    %4386 = arith.index_cast %c0_i64_4675 : i64 to index
    %4387 = arith.index_cast %4376 : i64 to index
    %4388 = arith.index_cast %4377 : i64 to index
    %padded_4676 = tensor.pad %cast_4608 low[%4385, %4386, %4387, %4388] high[%4385, %4386, %4387, %4388] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_4666 : f32
    } : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
    %4389 = arith.index_cast %c1_4657 : index to i64
    %c1_i64_4677 = arith.constant 1 : i64
    %c2_i64_4678 = arith.constant 2 : i64
    %4390 = arith.muli %4376, %c2_i64_4678 : i64
    %4391 = arith.index_cast %c14_4649 : index to i64
    %4392 = arith.addi %4391, %4390 : i64
    %4393 = arith.subi %4389, %c1_i64_4677 : i64
    %4394 = arith.muli %c1_i64_4662, %4393 : i64
    %4395 = arith.subi %4392, %4394 : i64
    %4396 = arith.subi %4395, %c1_i64_4677 : i64
    %4397 = arith.floordivsi %4396, %c1_i64_4664 : i64
    %4398 = arith.addi %4397, %c1_i64_4677 : i64
    %4399 = arith.index_cast %4398 : i64 to index
    %4400 = arith.index_cast %c1_4659 : index to i64
    %c1_i64_4679 = arith.constant 1 : i64
    %c2_i64_4680 = arith.constant 2 : i64
    %4401 = arith.muli %4377, %c2_i64_4680 : i64
    %4402 = arith.index_cast %c14_4651 : index to i64
    %4403 = arith.addi %4402, %4401 : i64
    %4404 = arith.subi %4400, %c1_i64_4679 : i64
    %4405 = arith.muli %c1_i64_4663, %4404 : i64
    %4406 = arith.subi %4403, %4405 : i64
    %4407 = arith.subi %4406, %c1_i64_4679 : i64
    %4408 = arith.floordivsi %4407, %c1_i64_4665 : i64
    %4409 = arith.addi %4408, %c1_i64_4679 : i64
    %4410 = arith.index_cast %4409 : i64 to index
    %4411 = tensor.empty(%4399, %4410) : tensor<1x256x?x?xf32>
    %4412 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4634 : tensor<256xf32>) outs(%4411 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %4413 = arith.floordivsi %c1024_4647, %4380 : index
    %4414 = arith.floordivsi %c256_4653, %4380 : index
    %c0_4681 = arith.constant 0 : index
    %c1_4682 = arith.constant 1 : index
    %4415 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4676, %cast_4623 : tensor<?x?x?x?xf32>, tensor<256x1024x1x1xf32>) outs(%4412 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_4683 = tensor.cast %4415 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
    %c1_4684 = arith.constant 1 : index
    %c1_4685 = arith.constant 1 : index
    %c256_4686 = arith.constant 256 : index
    %c2_4687 = arith.constant 2 : index
    %c14_4688 = arith.constant 14 : index
    %c3_4689 = arith.constant 3 : index
    %c14_4690 = arith.constant 14 : index
    %4416 = tensor.empty() : tensor<1x256x14x14xf32>
    %4417 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4683 : tensor<1x256x14x14xf32>) outs(%4416 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_4691 = tensor.cast %4417 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %4418 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4419 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4692 = torch.constant.int 12
    %4420 = torch.aten.item %4418 : !torch.vtensor<[],f32> -> !torch.float
    %4421 = torch_c.to_f64 %4420
    %4422 = torch.aten.item %4419 : !torch.vtensor<[],si8> -> !torch.int
    %4423 = torch_c.to_i64 %4422
    %c1_4693 = arith.constant 1 : index
    %c1_4694 = arith.constant 1 : index
    %c256_4695 = arith.constant 256 : index
    %c2_4696 = arith.constant 2 : index
    %c14_4697 = arith.constant 14 : index
    %c3_4698 = arith.constant 3 : index
    %c14_4699 = arith.constant 14 : index
    %4424 = tensor.empty() : tensor<1x256x14x14xi8>
    %4425 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4691 : tensor<1x256x14x14xf32>) outs(%4424 : tensor<1x256x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4422
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4420
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x14x14xi8>
    %cast_4700 = tensor.cast %4425 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %cast_4701 = tensor.cast %cast_4700 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %4426 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4427 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4428 = torch.aten.item %4426 : !torch.vtensor<[],f32> -> !torch.float
    %4429 = torch_c.to_f64 %4428
    %4430 = torch.aten.item %4427 : !torch.vtensor<[],si8> -> !torch.int
    %4431 = torch_c.to_i64 %4430
    %cast_4702 = tensor.cast %cast_4701 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %c1_4703 = arith.constant 1 : index
    %c1_4704 = arith.constant 1 : index
    %c256_4705 = arith.constant 256 : index
    %c2_4706 = arith.constant 2 : index
    %c14_4707 = arith.constant 14 : index
    %c3_4708 = arith.constant 3 : index
    %c14_4709 = arith.constant 14 : index
    %4432 = tensor.empty() : tensor<1x256x14x14xf32>
    %4433 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4702 : tensor<1x256x14x14xi8>) outs(%4432 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4430
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4428
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_4710 = tensor.cast %4433 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %4434 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4435 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4711 = torch.constant.int 12
    %4436 = torch.aten.item %4434 : !torch.vtensor<[],f32> -> !torch.float
    %4437 = torch_c.to_f64 %4436
    %4438 = torch.aten.item %4435 : !torch.vtensor<[],si8> -> !torch.int
    %4439 = torch_c.to_i64 %4438
    %c1_4712 = arith.constant 1 : index
    %c0_4713 = arith.constant 0 : index
    %c256_4714 = arith.constant 256 : index
    %c1_4715 = arith.constant 1 : index
    %c256_4716 = arith.constant 256 : index
    %c2_4717 = arith.constant 2 : index
    %c3_4718 = arith.constant 3 : index
    %c3_4719 = arith.constant 3 : index
    %c3_4720 = arith.constant 3 : index
    %4440 = tensor.empty() : tensor<256x256x3x3xi8>
    %4441 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%166 : tensor<256x256x3x3xf32>) outs(%4440 : tensor<256x256x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4438
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4436
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256x256x3x3xi8>
    %cast_4721 = tensor.cast %4441 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %cast_4722 = tensor.cast %cast_4721 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %4442 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4443 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4444 = torch.aten.item %4442 : !torch.vtensor<[],f32> -> !torch.float
    %4445 = torch_c.to_f64 %4444
    %4446 = torch.aten.item %4443 : !torch.vtensor<[],si8> -> !torch.int
    %4447 = torch_c.to_i64 %4446
    %cast_4723 = tensor.cast %cast_4722 : tensor<256x256x3x3xi8> to tensor<256x256x3x3xi8>
    %c1_4724 = arith.constant 1 : index
    %c0_4725 = arith.constant 0 : index
    %c256_4726 = arith.constant 256 : index
    %c1_4727 = arith.constant 1 : index
    %c256_4728 = arith.constant 256 : index
    %c2_4729 = arith.constant 2 : index
    %c3_4730 = arith.constant 3 : index
    %c3_4731 = arith.constant 3 : index
    %c3_4732 = arith.constant 3 : index
    %4448 = tensor.empty() : tensor<256x256x3x3xf32>
    %4449 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4723 : tensor<256x256x3x3xi8>) outs(%4448 : tensor<256x256x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4446
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4444
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256x256x3x3xf32>
    %cast_4733 = tensor.cast %4449 : tensor<256x256x3x3xf32> to tensor<256x256x3x3xf32>
    %4450 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4451 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4734 = torch.constant.int 12
    %4452 = torch.aten.item %4450 : !torch.vtensor<[],f32> -> !torch.float
    %4453 = torch_c.to_f64 %4452
    %4454 = torch.aten.item %4451 : !torch.vtensor<[],si8> -> !torch.int
    %4455 = torch_c.to_i64 %4454
    %c1_4735 = arith.constant 1 : index
    %c0_4736 = arith.constant 0 : index
    %c256_4737 = arith.constant 256 : index
    %4456 = tensor.empty() : tensor<256xi8>
    %4457 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%168 : tensor<256xf32>) outs(%4456 : tensor<256xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4454
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4452
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<256xi8>
    %cast_4738 = tensor.cast %4457 : tensor<256xi8> to tensor<256xi8>
    %cast_4739 = tensor.cast %cast_4738 : tensor<256xi8> to tensor<256xi8>
    %4458 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4459 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4460 = torch.aten.item %4458 : !torch.vtensor<[],f32> -> !torch.float
    %4461 = torch_c.to_f64 %4460
    %4462 = torch.aten.item %4459 : !torch.vtensor<[],si8> -> !torch.int
    %4463 = torch_c.to_i64 %4462
    %cast_4740 = tensor.cast %cast_4739 : tensor<256xi8> to tensor<256xi8>
    %c1_4741 = arith.constant 1 : index
    %c0_4742 = arith.constant 0 : index
    %c256_4743 = arith.constant 256 : index
    %4464 = tensor.empty() : tensor<256xf32>
    %4465 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4740 : tensor<256xi8>) outs(%4464 : tensor<256xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4462
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4460
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<256xf32>
    %cast_4744 = tensor.cast %4465 : tensor<256xf32> to tensor<256xf32>
    %int1_4745 = torch.constant.int 1
    %int1_4746 = torch.constant.int 1
    %int1_4747 = torch.constant.int 1
    %int1_4748 = torch.constant.int 1
    %int1_4749 = torch.constant.int 1
    %int1_4750 = torch.constant.int 1
    %int0_4751 = torch.constant.int 0
    %4466 = torch.prim.ListConstruct %int1_4745, %int1_4746 : (!torch.int, !torch.int) -> !torch.list<int>
    %4467 = torch.prim.ListConstruct %int1_4747, %int1_4748 : (!torch.int, !torch.int) -> !torch.list<int>
    %4468 = torch.prim.ListConstruct %int1_4749, %int1_4750 : (!torch.int, !torch.int) -> !torch.list<int>
    %4469 = torch.prim.ListConstruct %int0_4751, %int0_4751 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_4752 = torch.constant.bool false
    %int1_4753 = torch.constant.int 1
    %4470 = torch_c.to_i64 %int1_4753
    %4471 = torch_c.to_i64 %int1_4745
    %4472 = torch_c.to_i64 %int1_4746
    %4473 = torch_c.to_i64 %int0_4751
    %4474 = torch_c.to_i64 %int0_4751
    %c0_4754 = arith.constant 0 : index
    %c1_4755 = arith.constant 1 : index
    %c1_4756 = arith.constant 1 : index
    %c256_4757 = arith.constant 256 : index
    %c2_4758 = arith.constant 2 : index
    %c14_4759 = arith.constant 14 : index
    %c3_4760 = arith.constant 3 : index
    %c14_4761 = arith.constant 14 : index
    %c0_4762 = arith.constant 0 : index
    %c256_4763 = arith.constant 256 : index
    %c1_4764 = arith.constant 1 : index
    %c256_4765 = arith.constant 256 : index
    %c2_4766 = arith.constant 2 : index
    %c3_4767 = arith.constant 3 : index
    %c3_4768 = arith.constant 3 : index
    %c3_4769 = arith.constant 3 : index
    %4475 = arith.index_cast %4470 : i64 to index
    %c0_4770 = arith.constant 0 : index
    %4476 = arith.remsi %c256_4757, %4475 : index
    %4477 = arith.cmpi eq, %c0_4770, %4476 : index
    cf.assert %4477, "invalid: groups must divide input channel size evenly."
    %c0_4771 = arith.constant 0 : index
    %4478 = arith.remsi %c256_4763, %4475 : index
    %4479 = arith.cmpi eq, %c0_4771, %4478 : index
    cf.assert %4479, "invalid: groups must divide weight batch size evenly."
    %c1_i64_4772 = arith.constant 1 : i64
    %c1_i64_4773 = arith.constant 1 : i64
    %c1_i64_4774 = arith.constant 1 : i64
    %c1_i64_4775 = arith.constant 1 : i64
    %cst_4776 = arith.constant 0.000000e+00 : f32
    %c0_4777 = arith.constant 0 : index
    %c1_4778 = arith.constant 1 : index
    %c1_4779 = arith.constant 1 : index
    %c256_4780 = arith.constant 256 : index
    %c2_4781 = arith.constant 2 : index
    %c14_4782 = arith.constant 14 : index
    %c3_4783 = arith.constant 3 : index
    %c14_4784 = arith.constant 14 : index
    %c0_i64_4785 = arith.constant 0 : i64
    %4480 = arith.index_cast %c0_i64_4785 : i64 to index
    %4481 = arith.index_cast %c0_i64_4785 : i64 to index
    %4482 = arith.index_cast %4471 : i64 to index
    %4483 = arith.index_cast %4472 : i64 to index
    %padded_4786 = tensor.pad %cast_4710 low[%4480, %4481, %4482, %4483] high[%4480, %4481, %4482, %4483] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_4776 : f32
    } : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
    %4484 = arith.index_cast %c3_4767 : index to i64
    %c1_i64_4787 = arith.constant 1 : i64
    %c2_i64_4788 = arith.constant 2 : i64
    %4485 = arith.muli %4471, %c2_i64_4788 : i64
    %4486 = arith.index_cast %c14_4759 : index to i64
    %4487 = arith.addi %4486, %4485 : i64
    %4488 = arith.subi %4484, %c1_i64_4787 : i64
    %4489 = arith.muli %c1_i64_4772, %4488 : i64
    %4490 = arith.subi %4487, %4489 : i64
    %4491 = arith.subi %4490, %c1_i64_4787 : i64
    %4492 = arith.floordivsi %4491, %c1_i64_4774 : i64
    %4493 = arith.addi %4492, %c1_i64_4787 : i64
    %4494 = arith.index_cast %4493 : i64 to index
    %4495 = arith.index_cast %c3_4769 : index to i64
    %c1_i64_4789 = arith.constant 1 : i64
    %c2_i64_4790 = arith.constant 2 : i64
    %4496 = arith.muli %4472, %c2_i64_4790 : i64
    %4497 = arith.index_cast %c14_4761 : index to i64
    %4498 = arith.addi %4497, %4496 : i64
    %4499 = arith.subi %4495, %c1_i64_4789 : i64
    %4500 = arith.muli %c1_i64_4773, %4499 : i64
    %4501 = arith.subi %4498, %4500 : i64
    %4502 = arith.subi %4501, %c1_i64_4789 : i64
    %4503 = arith.floordivsi %4502, %c1_i64_4775 : i64
    %4504 = arith.addi %4503, %c1_i64_4789 : i64
    %4505 = arith.index_cast %4504 : i64 to index
    %4506 = tensor.empty(%4494, %4505) : tensor<1x256x?x?xf32>
    %4507 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4744 : tensor<256xf32>) outs(%4506 : tensor<1x256x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x256x?x?xf32>
    %4508 = arith.floordivsi %c256_4757, %4475 : index
    %4509 = arith.floordivsi %c256_4763, %4475 : index
    %c0_4791 = arith.constant 0 : index
    %c1_4792 = arith.constant 1 : index
    %4510 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4786, %cast_4733 : tensor<?x?x?x?xf32>, tensor<256x256x3x3xf32>) outs(%4507 : tensor<1x256x?x?xf32>) -> tensor<1x256x?x?xf32>
    %cast_4793 = tensor.cast %4510 : tensor<1x256x?x?xf32> to tensor<1x256x14x14xf32>
    %c1_4794 = arith.constant 1 : index
    %c1_4795 = arith.constant 1 : index
    %c256_4796 = arith.constant 256 : index
    %c2_4797 = arith.constant 2 : index
    %c14_4798 = arith.constant 14 : index
    %c3_4799 = arith.constant 3 : index
    %c14_4800 = arith.constant 14 : index
    %4511 = tensor.empty() : tensor<1x256x14x14xf32>
    %4512 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4793 : tensor<1x256x14x14xf32>) outs(%4511 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_4801 = tensor.cast %4512 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %4513 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4514 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4802 = torch.constant.int 12
    %4515 = torch.aten.item %4513 : !torch.vtensor<[],f32> -> !torch.float
    %4516 = torch_c.to_f64 %4515
    %4517 = torch.aten.item %4514 : !torch.vtensor<[],si8> -> !torch.int
    %4518 = torch_c.to_i64 %4517
    %c1_4803 = arith.constant 1 : index
    %c1_4804 = arith.constant 1 : index
    %c256_4805 = arith.constant 256 : index
    %c2_4806 = arith.constant 2 : index
    %c14_4807 = arith.constant 14 : index
    %c3_4808 = arith.constant 3 : index
    %c14_4809 = arith.constant 14 : index
    %4519 = tensor.empty() : tensor<1x256x14x14xi8>
    %4520 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4801 : tensor<1x256x14x14xf32>) outs(%4519 : tensor<1x256x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4517
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4515
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x256x14x14xi8>
    %cast_4810 = tensor.cast %4520 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %cast_4811 = tensor.cast %cast_4810 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %4521 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4522 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4523 = torch.aten.item %4521 : !torch.vtensor<[],f32> -> !torch.float
    %4524 = torch_c.to_f64 %4523
    %4525 = torch.aten.item %4522 : !torch.vtensor<[],si8> -> !torch.int
    %4526 = torch_c.to_i64 %4525
    %cast_4812 = tensor.cast %cast_4811 : tensor<1x256x14x14xi8> to tensor<1x256x14x14xi8>
    %c1_4813 = arith.constant 1 : index
    %c1_4814 = arith.constant 1 : index
    %c256_4815 = arith.constant 256 : index
    %c2_4816 = arith.constant 2 : index
    %c14_4817 = arith.constant 14 : index
    %c3_4818 = arith.constant 3 : index
    %c14_4819 = arith.constant 14 : index
    %4527 = tensor.empty() : tensor<1x256x14x14xf32>
    %4528 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4812 : tensor<1x256x14x14xi8>) outs(%4527 : tensor<1x256x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4525
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4523
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x256x14x14xf32>
    %cast_4820 = tensor.cast %4528 : tensor<1x256x14x14xf32> to tensor<1x256x14x14xf32>
    %4529 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4530 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4821 = torch.constant.int 12
    %4531 = torch.aten.item %4529 : !torch.vtensor<[],f32> -> !torch.float
    %4532 = torch_c.to_f64 %4531
    %4533 = torch.aten.item %4530 : !torch.vtensor<[],si8> -> !torch.int
    %4534 = torch_c.to_i64 %4533
    %c1_4822 = arith.constant 1 : index
    %c0_4823 = arith.constant 0 : index
    %c1024_4824 = arith.constant 1024 : index
    %c1_4825 = arith.constant 1 : index
    %c256_4826 = arith.constant 256 : index
    %4535 = tensor.empty() : tensor<1024x256x1x1xi8>
    %4536 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%170 : tensor<1024x256x1x1xf32>) outs(%4535 : tensor<1024x256x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4533
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4531
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024x256x1x1xi8>
    %cast_4827 = tensor.cast %4536 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %cast_4828 = tensor.cast %cast_4827 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %4537 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4538 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4539 = torch.aten.item %4537 : !torch.vtensor<[],f32> -> !torch.float
    %4540 = torch_c.to_f64 %4539
    %4541 = torch.aten.item %4538 : !torch.vtensor<[],si8> -> !torch.int
    %4542 = torch_c.to_i64 %4541
    %cast_4829 = tensor.cast %cast_4828 : tensor<1024x256x1x1xi8> to tensor<1024x256x1x1xi8>
    %c1_4830 = arith.constant 1 : index
    %c0_4831 = arith.constant 0 : index
    %c1024_4832 = arith.constant 1024 : index
    %c1_4833 = arith.constant 1 : index
    %c256_4834 = arith.constant 256 : index
    %4543 = tensor.empty() : tensor<1024x256x1x1xf32>
    %4544 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4829 : tensor<1024x256x1x1xi8>) outs(%4543 : tensor<1024x256x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4541
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4539
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024x256x1x1xf32>
    %cast_4835 = tensor.cast %4544 : tensor<1024x256x1x1xf32> to tensor<1024x256x1x1xf32>
    %4545 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4546 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4836 = torch.constant.int 12
    %4547 = torch.aten.item %4545 : !torch.vtensor<[],f32> -> !torch.float
    %4548 = torch_c.to_f64 %4547
    %4549 = torch.aten.item %4546 : !torch.vtensor<[],si8> -> !torch.int
    %4550 = torch_c.to_i64 %4549
    %c1_4837 = arith.constant 1 : index
    %c0_4838 = arith.constant 0 : index
    %c1024_4839 = arith.constant 1024 : index
    %4551 = tensor.empty() : tensor<1024xi8>
    %4552 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%172 : tensor<1024xf32>) outs(%4551 : tensor<1024xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4549
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4547
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1024xi8>
    %cast_4840 = tensor.cast %4552 : tensor<1024xi8> to tensor<1024xi8>
    %cast_4841 = tensor.cast %cast_4840 : tensor<1024xi8> to tensor<1024xi8>
    %4553 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4554 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4555 = torch.aten.item %4553 : !torch.vtensor<[],f32> -> !torch.float
    %4556 = torch_c.to_f64 %4555
    %4557 = torch.aten.item %4554 : !torch.vtensor<[],si8> -> !torch.int
    %4558 = torch_c.to_i64 %4557
    %cast_4842 = tensor.cast %cast_4841 : tensor<1024xi8> to tensor<1024xi8>
    %c1_4843 = arith.constant 1 : index
    %c0_4844 = arith.constant 0 : index
    %c1024_4845 = arith.constant 1024 : index
    %4559 = tensor.empty() : tensor<1024xf32>
    %4560 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4842 : tensor<1024xi8>) outs(%4559 : tensor<1024xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4557
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4555
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1024xf32>
    %cast_4846 = tensor.cast %4560 : tensor<1024xf32> to tensor<1024xf32>
    %int0_4847 = torch.constant.int 0
    %int0_4848 = torch.constant.int 0
    %int1_4849 = torch.constant.int 1
    %int1_4850 = torch.constant.int 1
    %int1_4851 = torch.constant.int 1
    %int1_4852 = torch.constant.int 1
    %int0_4853 = torch.constant.int 0
    %4561 = torch.prim.ListConstruct %int0_4847, %int0_4848 : (!torch.int, !torch.int) -> !torch.list<int>
    %4562 = torch.prim.ListConstruct %int1_4849, %int1_4850 : (!torch.int, !torch.int) -> !torch.list<int>
    %4563 = torch.prim.ListConstruct %int1_4851, %int1_4852 : (!torch.int, !torch.int) -> !torch.list<int>
    %4564 = torch.prim.ListConstruct %int0_4853, %int0_4853 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_4854 = torch.constant.bool false
    %int1_4855 = torch.constant.int 1
    %4565 = torch_c.to_i64 %int1_4855
    %4566 = torch_c.to_i64 %int0_4847
    %4567 = torch_c.to_i64 %int0_4848
    %4568 = torch_c.to_i64 %int0_4853
    %4569 = torch_c.to_i64 %int0_4853
    %c0_4856 = arith.constant 0 : index
    %c1_4857 = arith.constant 1 : index
    %c1_4858 = arith.constant 1 : index
    %c256_4859 = arith.constant 256 : index
    %c2_4860 = arith.constant 2 : index
    %c14_4861 = arith.constant 14 : index
    %c3_4862 = arith.constant 3 : index
    %c14_4863 = arith.constant 14 : index
    %c0_4864 = arith.constant 0 : index
    %c1024_4865 = arith.constant 1024 : index
    %c1_4866 = arith.constant 1 : index
    %c256_4867 = arith.constant 256 : index
    %c2_4868 = arith.constant 2 : index
    %c1_4869 = arith.constant 1 : index
    %c3_4870 = arith.constant 3 : index
    %c1_4871 = arith.constant 1 : index
    %4570 = arith.index_cast %4565 : i64 to index
    %c0_4872 = arith.constant 0 : index
    %4571 = arith.remsi %c256_4859, %4570 : index
    %4572 = arith.cmpi eq, %c0_4872, %4571 : index
    cf.assert %4572, "invalid: groups must divide input channel size evenly."
    %c0_4873 = arith.constant 0 : index
    %4573 = arith.remsi %c1024_4865, %4570 : index
    %4574 = arith.cmpi eq, %c0_4873, %4573 : index
    cf.assert %4574, "invalid: groups must divide weight batch size evenly."
    %c1_i64_4874 = arith.constant 1 : i64
    %c1_i64_4875 = arith.constant 1 : i64
    %c1_i64_4876 = arith.constant 1 : i64
    %c1_i64_4877 = arith.constant 1 : i64
    %cst_4878 = arith.constant 0.000000e+00 : f32
    %c0_4879 = arith.constant 0 : index
    %c1_4880 = arith.constant 1 : index
    %c1_4881 = arith.constant 1 : index
    %c256_4882 = arith.constant 256 : index
    %c2_4883 = arith.constant 2 : index
    %c14_4884 = arith.constant 14 : index
    %c3_4885 = arith.constant 3 : index
    %c14_4886 = arith.constant 14 : index
    %c0_i64_4887 = arith.constant 0 : i64
    %4575 = arith.index_cast %c0_i64_4887 : i64 to index
    %4576 = arith.index_cast %c0_i64_4887 : i64 to index
    %4577 = arith.index_cast %4566 : i64 to index
    %4578 = arith.index_cast %4567 : i64 to index
    %padded_4888 = tensor.pad %cast_4820 low[%4575, %4576, %4577, %4578] high[%4575, %4576, %4577, %4578] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_4878 : f32
    } : tensor<1x256x14x14xf32> to tensor<?x?x?x?xf32>
    %4579 = arith.index_cast %c1_4869 : index to i64
    %c1_i64_4889 = arith.constant 1 : i64
    %c2_i64_4890 = arith.constant 2 : i64
    %4580 = arith.muli %4566, %c2_i64_4890 : i64
    %4581 = arith.index_cast %c14_4861 : index to i64
    %4582 = arith.addi %4581, %4580 : i64
    %4583 = arith.subi %4579, %c1_i64_4889 : i64
    %4584 = arith.muli %c1_i64_4874, %4583 : i64
    %4585 = arith.subi %4582, %4584 : i64
    %4586 = arith.subi %4585, %c1_i64_4889 : i64
    %4587 = arith.floordivsi %4586, %c1_i64_4876 : i64
    %4588 = arith.addi %4587, %c1_i64_4889 : i64
    %4589 = arith.index_cast %4588 : i64 to index
    %4590 = arith.index_cast %c1_4871 : index to i64
    %c1_i64_4891 = arith.constant 1 : i64
    %c2_i64_4892 = arith.constant 2 : i64
    %4591 = arith.muli %4567, %c2_i64_4892 : i64
    %4592 = arith.index_cast %c14_4863 : index to i64
    %4593 = arith.addi %4592, %4591 : i64
    %4594 = arith.subi %4590, %c1_i64_4891 : i64
    %4595 = arith.muli %c1_i64_4875, %4594 : i64
    %4596 = arith.subi %4593, %4595 : i64
    %4597 = arith.subi %4596, %c1_i64_4891 : i64
    %4598 = arith.floordivsi %4597, %c1_i64_4877 : i64
    %4599 = arith.addi %4598, %c1_i64_4891 : i64
    %4600 = arith.index_cast %4599 : i64 to index
    %4601 = tensor.empty(%4589, %4600) : tensor<1x1024x?x?xf32>
    %4602 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4846 : tensor<1024xf32>) outs(%4601 : tensor<1x1024x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x1024x?x?xf32>
    %4603 = arith.floordivsi %c256_4859, %4570 : index
    %4604 = arith.floordivsi %c1024_4865, %4570 : index
    %c0_4893 = arith.constant 0 : index
    %c1_4894 = arith.constant 1 : index
    %4605 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_4888, %cast_4835 : tensor<?x?x?x?xf32>, tensor<1024x256x1x1xf32>) outs(%4602 : tensor<1x1024x?x?xf32>) -> tensor<1x1024x?x?xf32>
    %cast_4895 = tensor.cast %4605 : tensor<1x1024x?x?xf32> to tensor<1x1024x14x14xf32>
    %4606 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4607 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4896 = torch.constant.int 12
    %4608 = torch.aten.item %4606 : !torch.vtensor<[],f32> -> !torch.float
    %4609 = torch_c.to_f64 %4608
    %4610 = torch.aten.item %4607 : !torch.vtensor<[],si8> -> !torch.int
    %4611 = torch_c.to_i64 %4610
    %c1_4897 = arith.constant 1 : index
    %c1_4898 = arith.constant 1 : index
    %c1024_4899 = arith.constant 1024 : index
    %c2_4900 = arith.constant 2 : index
    %c14_4901 = arith.constant 14 : index
    %c3_4902 = arith.constant 3 : index
    %c14_4903 = arith.constant 14 : index
    %4612 = tensor.empty() : tensor<1x1024x14x14xi8>
    %4613 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4895 : tensor<1x1024x14x14xf32>) outs(%4612 : tensor<1x1024x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4610
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4608
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x1024x14x14xi8>
    %cast_4904 = tensor.cast %4613 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %cast_4905 = tensor.cast %cast_4904 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %4614 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4615 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4616 = torch.aten.item %4614 : !torch.vtensor<[],f32> -> !torch.float
    %4617 = torch_c.to_f64 %4616
    %4618 = torch.aten.item %4615 : !torch.vtensor<[],si8> -> !torch.int
    %4619 = torch_c.to_i64 %4618
    %cast_4906 = tensor.cast %cast_4905 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %c1_4907 = arith.constant 1 : index
    %c1_4908 = arith.constant 1 : index
    %c1024_4909 = arith.constant 1024 : index
    %c2_4910 = arith.constant 2 : index
    %c14_4911 = arith.constant 14 : index
    %c3_4912 = arith.constant 3 : index
    %c14_4913 = arith.constant 14 : index
    %4620 = tensor.empty() : tensor<1x1024x14x14xf32>
    %4621 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4906 : tensor<1x1024x14x14xi8>) outs(%4620 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4618
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4616
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_4914 = tensor.cast %4621 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %int1_4915 = torch.constant.int 1
    %4622 = torch_c.to_i64 %int1_4915
    %c1_4916 = arith.constant 1 : index
    %c1_4917 = arith.constant 1 : index
    %c1024_4918 = arith.constant 1024 : index
    %c2_4919 = arith.constant 2 : index
    %c14_4920 = arith.constant 14 : index
    %c3_4921 = arith.constant 3 : index
    %c14_4922 = arith.constant 14 : index
    %c1_4923 = arith.constant 1 : index
    %c1024_4924 = arith.constant 1024 : index
    %4623 = arith.cmpi eq, %c1024_4918, %c1024_4924 : index
    cf.assert %4623, "mismatched size for broadcast"
    %c2_4925 = arith.constant 2 : index
    %c14_4926 = arith.constant 14 : index
    %4624 = arith.cmpi eq, %c14_4920, %c14_4926 : index
    cf.assert %4624, "mismatched size for broadcast"
    %c3_4927 = arith.constant 3 : index
    %c14_4928 = arith.constant 14 : index
    %4625 = arith.cmpi eq, %c14_4922, %c14_4928 : index
    cf.assert %4625, "mismatched size for broadcast"
    %4626 = tensor.empty() : tensor<1x1024x14x14xf32>
    %4627 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4914, %cast_4608 : tensor<1x1024x14x14xf32>, tensor<1x1024x14x14xf32>) outs(%4626 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %4622 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_4929 = tensor.cast %4627 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %c1_4930 = arith.constant 1 : index
    %c1_4931 = arith.constant 1 : index
    %c1024_4932 = arith.constant 1024 : index
    %c2_4933 = arith.constant 2 : index
    %c14_4934 = arith.constant 14 : index
    %c3_4935 = arith.constant 3 : index
    %c14_4936 = arith.constant 14 : index
    %4628 = tensor.empty() : tensor<1x1024x14x14xf32>
    %4629 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4929 : tensor<1x1024x14x14xf32>) outs(%4628 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_4937 = tensor.cast %4629 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %4630 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4631 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4938 = torch.constant.int 12
    %4632 = torch.aten.item %4630 : !torch.vtensor<[],f32> -> !torch.float
    %4633 = torch_c.to_f64 %4632
    %4634 = torch.aten.item %4631 : !torch.vtensor<[],si8> -> !torch.int
    %4635 = torch_c.to_i64 %4634
    %c1_4939 = arith.constant 1 : index
    %c1_4940 = arith.constant 1 : index
    %c1024_4941 = arith.constant 1024 : index
    %c2_4942 = arith.constant 2 : index
    %c14_4943 = arith.constant 14 : index
    %c3_4944 = arith.constant 3 : index
    %c14_4945 = arith.constant 14 : index
    %4636 = tensor.empty() : tensor<1x1024x14x14xi8>
    %4637 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4937 : tensor<1x1024x14x14xf32>) outs(%4636 : tensor<1x1024x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4634
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4632
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x1024x14x14xi8>
    %cast_4946 = tensor.cast %4637 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %cast_4947 = tensor.cast %cast_4946 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %4638 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4639 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4640 = torch.aten.item %4638 : !torch.vtensor<[],f32> -> !torch.float
    %4641 = torch_c.to_f64 %4640
    %4642 = torch.aten.item %4639 : !torch.vtensor<[],si8> -> !torch.int
    %4643 = torch_c.to_i64 %4642
    %cast_4948 = tensor.cast %cast_4947 : tensor<1x1024x14x14xi8> to tensor<1x1024x14x14xi8>
    %c1_4949 = arith.constant 1 : index
    %c1_4950 = arith.constant 1 : index
    %c1024_4951 = arith.constant 1024 : index
    %c2_4952 = arith.constant 2 : index
    %c14_4953 = arith.constant 14 : index
    %c3_4954 = arith.constant 3 : index
    %c14_4955 = arith.constant 14 : index
    %4644 = tensor.empty() : tensor<1x1024x14x14xf32>
    %4645 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4948 : tensor<1x1024x14x14xi8>) outs(%4644 : tensor<1x1024x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4642
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4640
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x1024x14x14xf32>
    %cast_4956 = tensor.cast %4645 : tensor<1x1024x14x14xf32> to tensor<1x1024x14x14xf32>
    %4646 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %4647 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4957 = torch.constant.int 12
    %4648 = torch.aten.item %4646 : !torch.vtensor<[],f32> -> !torch.float
    %4649 = torch_c.to_f64 %4648
    %4650 = torch.aten.item %4647 : !torch.vtensor<[],si8> -> !torch.int
    %4651 = torch_c.to_i64 %4650
    %c1_4958 = arith.constant 1 : index
    %c0_4959 = arith.constant 0 : index
    %c512_4960 = arith.constant 512 : index
    %c1_4961 = arith.constant 1 : index
    %c1024_4962 = arith.constant 1024 : index
    %4652 = tensor.empty() : tensor<512x1024x1x1xi8>
    %4653 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%174 : tensor<512x1024x1x1xf32>) outs(%4652 : tensor<512x1024x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4650
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4648
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512x1024x1x1xi8>
    %cast_4963 = tensor.cast %4653 : tensor<512x1024x1x1xi8> to tensor<512x1024x1x1xi8>
    %cast_4964 = tensor.cast %cast_4963 : tensor<512x1024x1x1xi8> to tensor<512x1024x1x1xi8>
    %4654 = torch.vtensor.literal(dense<0.001953125> : tensor<f32>) : !torch.vtensor<[],f32>
    %4655 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4656 = torch.aten.item %4654 : !torch.vtensor<[],f32> -> !torch.float
    %4657 = torch_c.to_f64 %4656
    %4658 = torch.aten.item %4655 : !torch.vtensor<[],si8> -> !torch.int
    %4659 = torch_c.to_i64 %4658
    %cast_4965 = tensor.cast %cast_4964 : tensor<512x1024x1x1xi8> to tensor<512x1024x1x1xi8>
    %c1_4966 = arith.constant 1 : index
    %c0_4967 = arith.constant 0 : index
    %c512_4968 = arith.constant 512 : index
    %c1_4969 = arith.constant 1 : index
    %c1024_4970 = arith.constant 1024 : index
    %4660 = tensor.empty() : tensor<512x1024x1x1xf32>
    %4661 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4965 : tensor<512x1024x1x1xi8>) outs(%4660 : tensor<512x1024x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4658
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4656
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512x1024x1x1xf32>
    %cast_4971 = tensor.cast %4661 : tensor<512x1024x1x1xf32> to tensor<512x1024x1x1xf32>
    %4662 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4663 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_4972 = torch.constant.int 12
    %4664 = torch.aten.item %4662 : !torch.vtensor<[],f32> -> !torch.float
    %4665 = torch_c.to_f64 %4664
    %4666 = torch.aten.item %4663 : !torch.vtensor<[],si8> -> !torch.int
    %4667 = torch_c.to_i64 %4666
    %c1_4973 = arith.constant 1 : index
    %c0_4974 = arith.constant 0 : index
    %c512_4975 = arith.constant 512 : index
    %4668 = tensor.empty() : tensor<512xi8>
    %4669 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%176 : tensor<512xf32>) outs(%4668 : tensor<512xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4666
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4664
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512xi8>
    %cast_4976 = tensor.cast %4669 : tensor<512xi8> to tensor<512xi8>
    %cast_4977 = tensor.cast %cast_4976 : tensor<512xi8> to tensor<512xi8>
    %4670 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4671 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4672 = torch.aten.item %4670 : !torch.vtensor<[],f32> -> !torch.float
    %4673 = torch_c.to_f64 %4672
    %4674 = torch.aten.item %4671 : !torch.vtensor<[],si8> -> !torch.int
    %4675 = torch_c.to_i64 %4674
    %cast_4978 = tensor.cast %cast_4977 : tensor<512xi8> to tensor<512xi8>
    %c1_4979 = arith.constant 1 : index
    %c0_4980 = arith.constant 0 : index
    %c512_4981 = arith.constant 512 : index
    %4676 = tensor.empty() : tensor<512xf32>
    %4677 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_4978 : tensor<512xi8>) outs(%4676 : tensor<512xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4674
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4672
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512xf32>
    %cast_4982 = tensor.cast %4677 : tensor<512xf32> to tensor<512xf32>
    %int0_4983 = torch.constant.int 0
    %int0_4984 = torch.constant.int 0
    %int1_4985 = torch.constant.int 1
    %int1_4986 = torch.constant.int 1
    %int1_4987 = torch.constant.int 1
    %int1_4988 = torch.constant.int 1
    %int0_4989 = torch.constant.int 0
    %4678 = torch.prim.ListConstruct %int0_4983, %int0_4984 : (!torch.int, !torch.int) -> !torch.list<int>
    %4679 = torch.prim.ListConstruct %int1_4985, %int1_4986 : (!torch.int, !torch.int) -> !torch.list<int>
    %4680 = torch.prim.ListConstruct %int1_4987, %int1_4988 : (!torch.int, !torch.int) -> !torch.list<int>
    %4681 = torch.prim.ListConstruct %int0_4989, %int0_4989 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_4990 = torch.constant.bool false
    %int1_4991 = torch.constant.int 1
    %4682 = torch_c.to_i64 %int1_4991
    %4683 = torch_c.to_i64 %int0_4983
    %4684 = torch_c.to_i64 %int0_4984
    %4685 = torch_c.to_i64 %int0_4989
    %4686 = torch_c.to_i64 %int0_4989
    %c0_4992 = arith.constant 0 : index
    %c1_4993 = arith.constant 1 : index
    %c1_4994 = arith.constant 1 : index
    %c1024_4995 = arith.constant 1024 : index
    %c2_4996 = arith.constant 2 : index
    %c14_4997 = arith.constant 14 : index
    %c3_4998 = arith.constant 3 : index
    %c14_4999 = arith.constant 14 : index
    %c0_5000 = arith.constant 0 : index
    %c512_5001 = arith.constant 512 : index
    %c1_5002 = arith.constant 1 : index
    %c1024_5003 = arith.constant 1024 : index
    %c2_5004 = arith.constant 2 : index
    %c1_5005 = arith.constant 1 : index
    %c3_5006 = arith.constant 3 : index
    %c1_5007 = arith.constant 1 : index
    %4687 = arith.index_cast %4682 : i64 to index
    %c0_5008 = arith.constant 0 : index
    %4688 = arith.remsi %c1024_4995, %4687 : index
    %4689 = arith.cmpi eq, %c0_5008, %4688 : index
    cf.assert %4689, "invalid: groups must divide input channel size evenly."
    %c0_5009 = arith.constant 0 : index
    %4690 = arith.remsi %c512_5001, %4687 : index
    %4691 = arith.cmpi eq, %c0_5009, %4690 : index
    cf.assert %4691, "invalid: groups must divide weight batch size evenly."
    %c1_i64_5010 = arith.constant 1 : i64
    %c1_i64_5011 = arith.constant 1 : i64
    %c1_i64_5012 = arith.constant 1 : i64
    %c1_i64_5013 = arith.constant 1 : i64
    %cst_5014 = arith.constant 0.000000e+00 : f32
    %c0_5015 = arith.constant 0 : index
    %c1_5016 = arith.constant 1 : index
    %c1_5017 = arith.constant 1 : index
    %c1024_5018 = arith.constant 1024 : index
    %c2_5019 = arith.constant 2 : index
    %c14_5020 = arith.constant 14 : index
    %c3_5021 = arith.constant 3 : index
    %c14_5022 = arith.constant 14 : index
    %c0_i64_5023 = arith.constant 0 : i64
    %4692 = arith.index_cast %c0_i64_5023 : i64 to index
    %4693 = arith.index_cast %c0_i64_5023 : i64 to index
    %4694 = arith.index_cast %4683 : i64 to index
    %4695 = arith.index_cast %4684 : i64 to index
    %padded_5024 = tensor.pad %cast_4956 low[%4692, %4693, %4694, %4695] high[%4692, %4693, %4694, %4695] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_5014 : f32
    } : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
    %4696 = arith.index_cast %c1_5005 : index to i64
    %c1_i64_5025 = arith.constant 1 : i64
    %c2_i64_5026 = arith.constant 2 : i64
    %4697 = arith.muli %4683, %c2_i64_5026 : i64
    %4698 = arith.index_cast %c14_4997 : index to i64
    %4699 = arith.addi %4698, %4697 : i64
    %4700 = arith.subi %4696, %c1_i64_5025 : i64
    %4701 = arith.muli %c1_i64_5010, %4700 : i64
    %4702 = arith.subi %4699, %4701 : i64
    %4703 = arith.subi %4702, %c1_i64_5025 : i64
    %4704 = arith.floordivsi %4703, %c1_i64_5012 : i64
    %4705 = arith.addi %4704, %c1_i64_5025 : i64
    %4706 = arith.index_cast %4705 : i64 to index
    %4707 = arith.index_cast %c1_5007 : index to i64
    %c1_i64_5027 = arith.constant 1 : i64
    %c2_i64_5028 = arith.constant 2 : i64
    %4708 = arith.muli %4684, %c2_i64_5028 : i64
    %4709 = arith.index_cast %c14_4999 : index to i64
    %4710 = arith.addi %4709, %4708 : i64
    %4711 = arith.subi %4707, %c1_i64_5027 : i64
    %4712 = arith.muli %c1_i64_5011, %4711 : i64
    %4713 = arith.subi %4710, %4712 : i64
    %4714 = arith.subi %4713, %c1_i64_5027 : i64
    %4715 = arith.floordivsi %4714, %c1_i64_5013 : i64
    %4716 = arith.addi %4715, %c1_i64_5027 : i64
    %4717 = arith.index_cast %4716 : i64 to index
    %4718 = tensor.empty(%4706, %4717) : tensor<1x512x?x?xf32>
    %4719 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_4982 : tensor<512xf32>) outs(%4718 : tensor<1x512x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x512x?x?xf32>
    %4720 = arith.floordivsi %c1024_4995, %4687 : index
    %4721 = arith.floordivsi %c512_5001, %4687 : index
    %c0_5029 = arith.constant 0 : index
    %c1_5030 = arith.constant 1 : index
    %4722 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5024, %cast_4971 : tensor<?x?x?x?xf32>, tensor<512x1024x1x1xf32>) outs(%4719 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
    %cast_5031 = tensor.cast %4722 : tensor<1x512x?x?xf32> to tensor<1x512x14x14xf32>
    %c1_5032 = arith.constant 1 : index
    %c1_5033 = arith.constant 1 : index
    %c512_5034 = arith.constant 512 : index
    %c2_5035 = arith.constant 2 : index
    %c14_5036 = arith.constant 14 : index
    %c3_5037 = arith.constant 3 : index
    %c14_5038 = arith.constant 14 : index
    %4723 = tensor.empty() : tensor<1x512x14x14xf32>
    %4724 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5031 : tensor<1x512x14x14xf32>) outs(%4723 : tensor<1x512x14x14xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x512x14x14xf32>
    %cast_5039 = tensor.cast %4724 : tensor<1x512x14x14xf32> to tensor<1x512x14x14xf32>
    %4725 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4726 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5040 = torch.constant.int 12
    %4727 = torch.aten.item %4725 : !torch.vtensor<[],f32> -> !torch.float
    %4728 = torch_c.to_f64 %4727
    %4729 = torch.aten.item %4726 : !torch.vtensor<[],si8> -> !torch.int
    %4730 = torch_c.to_i64 %4729
    %c1_5041 = arith.constant 1 : index
    %c1_5042 = arith.constant 1 : index
    %c512_5043 = arith.constant 512 : index
    %c2_5044 = arith.constant 2 : index
    %c14_5045 = arith.constant 14 : index
    %c3_5046 = arith.constant 3 : index
    %c14_5047 = arith.constant 14 : index
    %4731 = tensor.empty() : tensor<1x512x14x14xi8>
    %4732 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5039 : tensor<1x512x14x14xf32>) outs(%4731 : tensor<1x512x14x14xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4729
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4727
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x14x14xi8>
    %cast_5048 = tensor.cast %4732 : tensor<1x512x14x14xi8> to tensor<1x512x14x14xi8>
    %cast_5049 = tensor.cast %cast_5048 : tensor<1x512x14x14xi8> to tensor<1x512x14x14xi8>
    %4733 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4734 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4735 = torch.aten.item %4733 : !torch.vtensor<[],f32> -> !torch.float
    %4736 = torch_c.to_f64 %4735
    %4737 = torch.aten.item %4734 : !torch.vtensor<[],si8> -> !torch.int
    %4738 = torch_c.to_i64 %4737
    %cast_5050 = tensor.cast %cast_5049 : tensor<1x512x14x14xi8> to tensor<1x512x14x14xi8>
    %c1_5051 = arith.constant 1 : index
    %c1_5052 = arith.constant 1 : index
    %c512_5053 = arith.constant 512 : index
    %c2_5054 = arith.constant 2 : index
    %c14_5055 = arith.constant 14 : index
    %c3_5056 = arith.constant 3 : index
    %c14_5057 = arith.constant 14 : index
    %4739 = tensor.empty() : tensor<1x512x14x14xf32>
    %4740 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5050 : tensor<1x512x14x14xi8>) outs(%4739 : tensor<1x512x14x14xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4737
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4735
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x14x14xf32>
    %cast_5058 = tensor.cast %4740 : tensor<1x512x14x14xf32> to tensor<1x512x14x14xf32>
    %4741 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4742 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5059 = torch.constant.int 12
    %4743 = torch.aten.item %4741 : !torch.vtensor<[],f32> -> !torch.float
    %4744 = torch_c.to_f64 %4743
    %4745 = torch.aten.item %4742 : !torch.vtensor<[],si8> -> !torch.int
    %4746 = torch_c.to_i64 %4745
    %c1_5060 = arith.constant 1 : index
    %c0_5061 = arith.constant 0 : index
    %c512_5062 = arith.constant 512 : index
    %c1_5063 = arith.constant 1 : index
    %c512_5064 = arith.constant 512 : index
    %c2_5065 = arith.constant 2 : index
    %c3_5066 = arith.constant 3 : index
    %c3_5067 = arith.constant 3 : index
    %c3_5068 = arith.constant 3 : index
    %4747 = tensor.empty() : tensor<512x512x3x3xi8>
    %4748 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%178 : tensor<512x512x3x3xf32>) outs(%4747 : tensor<512x512x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4745
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4743
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512x512x3x3xi8>
    %cast_5069 = tensor.cast %4748 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
    %cast_5070 = tensor.cast %cast_5069 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
    %4749 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4750 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4751 = torch.aten.item %4749 : !torch.vtensor<[],f32> -> !torch.float
    %4752 = torch_c.to_f64 %4751
    %4753 = torch.aten.item %4750 : !torch.vtensor<[],si8> -> !torch.int
    %4754 = torch_c.to_i64 %4753
    %cast_5071 = tensor.cast %cast_5070 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
    %c1_5072 = arith.constant 1 : index
    %c0_5073 = arith.constant 0 : index
    %c512_5074 = arith.constant 512 : index
    %c1_5075 = arith.constant 1 : index
    %c512_5076 = arith.constant 512 : index
    %c2_5077 = arith.constant 2 : index
    %c3_5078 = arith.constant 3 : index
    %c3_5079 = arith.constant 3 : index
    %c3_5080 = arith.constant 3 : index
    %4755 = tensor.empty() : tensor<512x512x3x3xf32>
    %4756 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5071 : tensor<512x512x3x3xi8>) outs(%4755 : tensor<512x512x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4753
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4751
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512x512x3x3xf32>
    %cast_5081 = tensor.cast %4756 : tensor<512x512x3x3xf32> to tensor<512x512x3x3xf32>
    %4757 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4758 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5082 = torch.constant.int 12
    %4759 = torch.aten.item %4757 : !torch.vtensor<[],f32> -> !torch.float
    %4760 = torch_c.to_f64 %4759
    %4761 = torch.aten.item %4758 : !torch.vtensor<[],si8> -> !torch.int
    %4762 = torch_c.to_i64 %4761
    %c1_5083 = arith.constant 1 : index
    %c0_5084 = arith.constant 0 : index
    %c512_5085 = arith.constant 512 : index
    %4763 = tensor.empty() : tensor<512xi8>
    %4764 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%180 : tensor<512xf32>) outs(%4763 : tensor<512xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4761
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4759
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512xi8>
    %cast_5086 = tensor.cast %4764 : tensor<512xi8> to tensor<512xi8>
    %cast_5087 = tensor.cast %cast_5086 : tensor<512xi8> to tensor<512xi8>
    %4765 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4766 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4767 = torch.aten.item %4765 : !torch.vtensor<[],f32> -> !torch.float
    %4768 = torch_c.to_f64 %4767
    %4769 = torch.aten.item %4766 : !torch.vtensor<[],si8> -> !torch.int
    %4770 = torch_c.to_i64 %4769
    %cast_5088 = tensor.cast %cast_5087 : tensor<512xi8> to tensor<512xi8>
    %c1_5089 = arith.constant 1 : index
    %c0_5090 = arith.constant 0 : index
    %c512_5091 = arith.constant 512 : index
    %4771 = tensor.empty() : tensor<512xf32>
    %4772 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5088 : tensor<512xi8>) outs(%4771 : tensor<512xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4769
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4767
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512xf32>
    %cast_5092 = tensor.cast %4772 : tensor<512xf32> to tensor<512xf32>
    %int1_5093 = torch.constant.int 1
    %int1_5094 = torch.constant.int 1
    %int1_5095 = torch.constant.int 1
    %int1_5096 = torch.constant.int 1
    %int2_5097 = torch.constant.int 2
    %int2_5098 = torch.constant.int 2
    %int0_5099 = torch.constant.int 0
    %4773 = torch.prim.ListConstruct %int1_5093, %int1_5094 : (!torch.int, !torch.int) -> !torch.list<int>
    %4774 = torch.prim.ListConstruct %int1_5095, %int1_5096 : (!torch.int, !torch.int) -> !torch.list<int>
    %4775 = torch.prim.ListConstruct %int2_5097, %int2_5098 : (!torch.int, !torch.int) -> !torch.list<int>
    %4776 = torch.prim.ListConstruct %int0_5099, %int0_5099 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_5100 = torch.constant.bool false
    %int1_5101 = torch.constant.int 1
    %4777 = torch_c.to_i64 %int1_5101
    %4778 = torch_c.to_i64 %int1_5093
    %4779 = torch_c.to_i64 %int1_5094
    %4780 = torch_c.to_i64 %int0_5099
    %4781 = torch_c.to_i64 %int0_5099
    %c0_5102 = arith.constant 0 : index
    %c1_5103 = arith.constant 1 : index
    %c1_5104 = arith.constant 1 : index
    %c512_5105 = arith.constant 512 : index
    %c2_5106 = arith.constant 2 : index
    %c14_5107 = arith.constant 14 : index
    %c3_5108 = arith.constant 3 : index
    %c14_5109 = arith.constant 14 : index
    %c0_5110 = arith.constant 0 : index
    %c512_5111 = arith.constant 512 : index
    %c1_5112 = arith.constant 1 : index
    %c512_5113 = arith.constant 512 : index
    %c2_5114 = arith.constant 2 : index
    %c3_5115 = arith.constant 3 : index
    %c3_5116 = arith.constant 3 : index
    %c3_5117 = arith.constant 3 : index
    %4782 = arith.index_cast %4777 : i64 to index
    %c0_5118 = arith.constant 0 : index
    %4783 = arith.remsi %c512_5105, %4782 : index
    %4784 = arith.cmpi eq, %c0_5118, %4783 : index
    cf.assert %4784, "invalid: groups must divide input channel size evenly."
    %c0_5119 = arith.constant 0 : index
    %4785 = arith.remsi %c512_5111, %4782 : index
    %4786 = arith.cmpi eq, %c0_5119, %4785 : index
    cf.assert %4786, "invalid: groups must divide weight batch size evenly."
    %c1_i64_5120 = arith.constant 1 : i64
    %c1_i64_5121 = arith.constant 1 : i64
    %c2_i64_5122 = arith.constant 2 : i64
    %c2_i64_5123 = arith.constant 2 : i64
    %cst_5124 = arith.constant 0.000000e+00 : f32
    %c0_5125 = arith.constant 0 : index
    %c1_5126 = arith.constant 1 : index
    %c1_5127 = arith.constant 1 : index
    %c512_5128 = arith.constant 512 : index
    %c2_5129 = arith.constant 2 : index
    %c14_5130 = arith.constant 14 : index
    %c3_5131 = arith.constant 3 : index
    %c14_5132 = arith.constant 14 : index
    %c0_i64_5133 = arith.constant 0 : i64
    %4787 = arith.index_cast %c0_i64_5133 : i64 to index
    %4788 = arith.index_cast %c0_i64_5133 : i64 to index
    %4789 = arith.index_cast %4778 : i64 to index
    %4790 = arith.index_cast %4779 : i64 to index
    %padded_5134 = tensor.pad %cast_5058 low[%4787, %4788, %4789, %4790] high[%4787, %4788, %4789, %4790] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_5124 : f32
    } : tensor<1x512x14x14xf32> to tensor<?x?x?x?xf32>
    %4791 = arith.index_cast %c3_5115 : index to i64
    %c1_i64_5135 = arith.constant 1 : i64
    %c2_i64_5136 = arith.constant 2 : i64
    %4792 = arith.muli %4778, %c2_i64_5136 : i64
    %4793 = arith.index_cast %c14_5107 : index to i64
    %4794 = arith.addi %4793, %4792 : i64
    %4795 = arith.subi %4791, %c1_i64_5135 : i64
    %4796 = arith.muli %c1_i64_5120, %4795 : i64
    %4797 = arith.subi %4794, %4796 : i64
    %4798 = arith.subi %4797, %c1_i64_5135 : i64
    %4799 = arith.floordivsi %4798, %c2_i64_5122 : i64
    %4800 = arith.addi %4799, %c1_i64_5135 : i64
    %4801 = arith.index_cast %4800 : i64 to index
    %4802 = arith.index_cast %c3_5117 : index to i64
    %c1_i64_5137 = arith.constant 1 : i64
    %c2_i64_5138 = arith.constant 2 : i64
    %4803 = arith.muli %4779, %c2_i64_5138 : i64
    %4804 = arith.index_cast %c14_5109 : index to i64
    %4805 = arith.addi %4804, %4803 : i64
    %4806 = arith.subi %4802, %c1_i64_5137 : i64
    %4807 = arith.muli %c1_i64_5121, %4806 : i64
    %4808 = arith.subi %4805, %4807 : i64
    %4809 = arith.subi %4808, %c1_i64_5137 : i64
    %4810 = arith.floordivsi %4809, %c2_i64_5123 : i64
    %4811 = arith.addi %4810, %c1_i64_5137 : i64
    %4812 = arith.index_cast %4811 : i64 to index
    %4813 = tensor.empty(%4801, %4812) : tensor<1x512x?x?xf32>
    %4814 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5092 : tensor<512xf32>) outs(%4813 : tensor<1x512x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x512x?x?xf32>
    %4815 = arith.floordivsi %c512_5105, %4782 : index
    %4816 = arith.floordivsi %c512_5111, %4782 : index
    %c0_5139 = arith.constant 0 : index
    %c1_5140 = arith.constant 1 : index
    %4817 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_5134, %cast_5081 : tensor<?x?x?x?xf32>, tensor<512x512x3x3xf32>) outs(%4814 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
    %cast_5141 = tensor.cast %4817 : tensor<1x512x?x?xf32> to tensor<1x512x7x7xf32>
    %c1_5142 = arith.constant 1 : index
    %c1_5143 = arith.constant 1 : index
    %c512_5144 = arith.constant 512 : index
    %c2_5145 = arith.constant 2 : index
    %c7_5146 = arith.constant 7 : index
    %c3_5147 = arith.constant 3 : index
    %c7_5148 = arith.constant 7 : index
    %4818 = tensor.empty() : tensor<1x512x7x7xf32>
    %4819 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5141 : tensor<1x512x7x7xf32>) outs(%4818 : tensor<1x512x7x7xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x512x7x7xf32>
    %cast_5149 = tensor.cast %4819 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
    %4820 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4821 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5150 = torch.constant.int 12
    %4822 = torch.aten.item %4820 : !torch.vtensor<[],f32> -> !torch.float
    %4823 = torch_c.to_f64 %4822
    %4824 = torch.aten.item %4821 : !torch.vtensor<[],si8> -> !torch.int
    %4825 = torch_c.to_i64 %4824
    %c1_5151 = arith.constant 1 : index
    %c1_5152 = arith.constant 1 : index
    %c512_5153 = arith.constant 512 : index
    %c2_5154 = arith.constant 2 : index
    %c7_5155 = arith.constant 7 : index
    %c3_5156 = arith.constant 3 : index
    %c7_5157 = arith.constant 7 : index
    %4826 = tensor.empty() : tensor<1x512x7x7xi8>
    %4827 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5149 : tensor<1x512x7x7xf32>) outs(%4826 : tensor<1x512x7x7xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4824
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4822
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x7x7xi8>
    %cast_5158 = tensor.cast %4827 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
    %cast_5159 = tensor.cast %cast_5158 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
    %4828 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4829 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4830 = torch.aten.item %4828 : !torch.vtensor<[],f32> -> !torch.float
    %4831 = torch_c.to_f64 %4830
    %4832 = torch.aten.item %4829 : !torch.vtensor<[],si8> -> !torch.int
    %4833 = torch_c.to_i64 %4832
    %cast_5160 = tensor.cast %cast_5159 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
    %c1_5161 = arith.constant 1 : index
    %c1_5162 = arith.constant 1 : index
    %c512_5163 = arith.constant 512 : index
    %c2_5164 = arith.constant 2 : index
    %c7_5165 = arith.constant 7 : index
    %c3_5166 = arith.constant 3 : index
    %c7_5167 = arith.constant 7 : index
    %4834 = tensor.empty() : tensor<1x512x7x7xf32>
    %4835 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5160 : tensor<1x512x7x7xi8>) outs(%4834 : tensor<1x512x7x7xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4832
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4830
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x7x7xf32>
    %cast_5168 = tensor.cast %4835 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
    %4836 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4837 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5169 = torch.constant.int 12
    %4838 = torch.aten.item %4836 : !torch.vtensor<[],f32> -> !torch.float
    %4839 = torch_c.to_f64 %4838
    %4840 = torch.aten.item %4837 : !torch.vtensor<[],si8> -> !torch.int
    %4841 = torch_c.to_i64 %4840
    %c1_5170 = arith.constant 1 : index
    %c0_5171 = arith.constant 0 : index
    %c2048 = arith.constant 2048 : index
    %c1_5172 = arith.constant 1 : index
    %c512_5173 = arith.constant 512 : index
    %4842 = tensor.empty() : tensor<2048x512x1x1xi8>
    %4843 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%182 : tensor<2048x512x1x1xf32>) outs(%4842 : tensor<2048x512x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4840
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4838
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<2048x512x1x1xi8>
    %cast_5174 = tensor.cast %4843 : tensor<2048x512x1x1xi8> to tensor<2048x512x1x1xi8>
    %cast_5175 = tensor.cast %cast_5174 : tensor<2048x512x1x1xi8> to tensor<2048x512x1x1xi8>
    %4844 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4845 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4846 = torch.aten.item %4844 : !torch.vtensor<[],f32> -> !torch.float
    %4847 = torch_c.to_f64 %4846
    %4848 = torch.aten.item %4845 : !torch.vtensor<[],si8> -> !torch.int
    %4849 = torch_c.to_i64 %4848
    %cast_5176 = tensor.cast %cast_5175 : tensor<2048x512x1x1xi8> to tensor<2048x512x1x1xi8>
    %c1_5177 = arith.constant 1 : index
    %c0_5178 = arith.constant 0 : index
    %c2048_5179 = arith.constant 2048 : index
    %c1_5180 = arith.constant 1 : index
    %c512_5181 = arith.constant 512 : index
    %4850 = tensor.empty() : tensor<2048x512x1x1xf32>
    %4851 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5176 : tensor<2048x512x1x1xi8>) outs(%4850 : tensor<2048x512x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4848
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4846
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<2048x512x1x1xf32>
    %cast_5182 = tensor.cast %4851 : tensor<2048x512x1x1xf32> to tensor<2048x512x1x1xf32>
    %4852 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4853 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5183 = torch.constant.int 12
    %4854 = torch.aten.item %4852 : !torch.vtensor<[],f32> -> !torch.float
    %4855 = torch_c.to_f64 %4854
    %4856 = torch.aten.item %4853 : !torch.vtensor<[],si8> -> !torch.int
    %4857 = torch_c.to_i64 %4856
    %c1_5184 = arith.constant 1 : index
    %c0_5185 = arith.constant 0 : index
    %c2048_5186 = arith.constant 2048 : index
    %4858 = tensor.empty() : tensor<2048xi8>
    %4859 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%184 : tensor<2048xf32>) outs(%4858 : tensor<2048xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4856
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4854
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<2048xi8>
    %cast_5187 = tensor.cast %4859 : tensor<2048xi8> to tensor<2048xi8>
    %cast_5188 = tensor.cast %cast_5187 : tensor<2048xi8> to tensor<2048xi8>
    %4860 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4861 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4862 = torch.aten.item %4860 : !torch.vtensor<[],f32> -> !torch.float
    %4863 = torch_c.to_f64 %4862
    %4864 = torch.aten.item %4861 : !torch.vtensor<[],si8> -> !torch.int
    %4865 = torch_c.to_i64 %4864
    %cast_5189 = tensor.cast %cast_5188 : tensor<2048xi8> to tensor<2048xi8>
    %c1_5190 = arith.constant 1 : index
    %c0_5191 = arith.constant 0 : index
    %c2048_5192 = arith.constant 2048 : index
    %4866 = tensor.empty() : tensor<2048xf32>
    %4867 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5189 : tensor<2048xi8>) outs(%4866 : tensor<2048xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4864
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4862
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<2048xf32>
    %cast_5193 = tensor.cast %4867 : tensor<2048xf32> to tensor<2048xf32>
    %int0_5194 = torch.constant.int 0
    %int0_5195 = torch.constant.int 0
    %int1_5196 = torch.constant.int 1
    %int1_5197 = torch.constant.int 1
    %int1_5198 = torch.constant.int 1
    %int1_5199 = torch.constant.int 1
    %int0_5200 = torch.constant.int 0
    %4868 = torch.prim.ListConstruct %int0_5194, %int0_5195 : (!torch.int, !torch.int) -> !torch.list<int>
    %4869 = torch.prim.ListConstruct %int1_5196, %int1_5197 : (!torch.int, !torch.int) -> !torch.list<int>
    %4870 = torch.prim.ListConstruct %int1_5198, %int1_5199 : (!torch.int, !torch.int) -> !torch.list<int>
    %4871 = torch.prim.ListConstruct %int0_5200, %int0_5200 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_5201 = torch.constant.bool false
    %int1_5202 = torch.constant.int 1
    %4872 = torch_c.to_i64 %int1_5202
    %4873 = torch_c.to_i64 %int0_5194
    %4874 = torch_c.to_i64 %int0_5195
    %4875 = torch_c.to_i64 %int0_5200
    %4876 = torch_c.to_i64 %int0_5200
    %c0_5203 = arith.constant 0 : index
    %c1_5204 = arith.constant 1 : index
    %c1_5205 = arith.constant 1 : index
    %c512_5206 = arith.constant 512 : index
    %c2_5207 = arith.constant 2 : index
    %c7_5208 = arith.constant 7 : index
    %c3_5209 = arith.constant 3 : index
    %c7_5210 = arith.constant 7 : index
    %c0_5211 = arith.constant 0 : index
    %c2048_5212 = arith.constant 2048 : index
    %c1_5213 = arith.constant 1 : index
    %c512_5214 = arith.constant 512 : index
    %c2_5215 = arith.constant 2 : index
    %c1_5216 = arith.constant 1 : index
    %c3_5217 = arith.constant 3 : index
    %c1_5218 = arith.constant 1 : index
    %4877 = arith.index_cast %4872 : i64 to index
    %c0_5219 = arith.constant 0 : index
    %4878 = arith.remsi %c512_5206, %4877 : index
    %4879 = arith.cmpi eq, %c0_5219, %4878 : index
    cf.assert %4879, "invalid: groups must divide input channel size evenly."
    %c0_5220 = arith.constant 0 : index
    %4880 = arith.remsi %c2048_5212, %4877 : index
    %4881 = arith.cmpi eq, %c0_5220, %4880 : index
    cf.assert %4881, "invalid: groups must divide weight batch size evenly."
    %c1_i64_5221 = arith.constant 1 : i64
    %c1_i64_5222 = arith.constant 1 : i64
    %c1_i64_5223 = arith.constant 1 : i64
    %c1_i64_5224 = arith.constant 1 : i64
    %cst_5225 = arith.constant 0.000000e+00 : f32
    %c0_5226 = arith.constant 0 : index
    %c1_5227 = arith.constant 1 : index
    %c1_5228 = arith.constant 1 : index
    %c512_5229 = arith.constant 512 : index
    %c2_5230 = arith.constant 2 : index
    %c7_5231 = arith.constant 7 : index
    %c3_5232 = arith.constant 3 : index
    %c7_5233 = arith.constant 7 : index
    %c0_i64_5234 = arith.constant 0 : i64
    %4882 = arith.index_cast %c0_i64_5234 : i64 to index
    %4883 = arith.index_cast %c0_i64_5234 : i64 to index
    %4884 = arith.index_cast %4873 : i64 to index
    %4885 = arith.index_cast %4874 : i64 to index
    %padded_5235 = tensor.pad %cast_5168 low[%4882, %4883, %4884, %4885] high[%4882, %4883, %4884, %4885] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_5225 : f32
    } : tensor<1x512x7x7xf32> to tensor<?x?x?x?xf32>
    %4886 = arith.index_cast %c1_5216 : index to i64
    %c1_i64_5236 = arith.constant 1 : i64
    %c2_i64_5237 = arith.constant 2 : i64
    %4887 = arith.muli %4873, %c2_i64_5237 : i64
    %4888 = arith.index_cast %c7_5208 : index to i64
    %4889 = arith.addi %4888, %4887 : i64
    %4890 = arith.subi %4886, %c1_i64_5236 : i64
    %4891 = arith.muli %c1_i64_5221, %4890 : i64
    %4892 = arith.subi %4889, %4891 : i64
    %4893 = arith.subi %4892, %c1_i64_5236 : i64
    %4894 = arith.floordivsi %4893, %c1_i64_5223 : i64
    %4895 = arith.addi %4894, %c1_i64_5236 : i64
    %4896 = arith.index_cast %4895 : i64 to index
    %4897 = arith.index_cast %c1_5218 : index to i64
    %c1_i64_5238 = arith.constant 1 : i64
    %c2_i64_5239 = arith.constant 2 : i64
    %4898 = arith.muli %4874, %c2_i64_5239 : i64
    %4899 = arith.index_cast %c7_5210 : index to i64
    %4900 = arith.addi %4899, %4898 : i64
    %4901 = arith.subi %4897, %c1_i64_5238 : i64
    %4902 = arith.muli %c1_i64_5222, %4901 : i64
    %4903 = arith.subi %4900, %4902 : i64
    %4904 = arith.subi %4903, %c1_i64_5238 : i64
    %4905 = arith.floordivsi %4904, %c1_i64_5224 : i64
    %4906 = arith.addi %4905, %c1_i64_5238 : i64
    %4907 = arith.index_cast %4906 : i64 to index
    %4908 = tensor.empty(%4896, %4907) : tensor<1x2048x?x?xf32>
    %4909 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5193 : tensor<2048xf32>) outs(%4908 : tensor<1x2048x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x2048x?x?xf32>
    %4910 = arith.floordivsi %c512_5206, %4877 : index
    %4911 = arith.floordivsi %c2048_5212, %4877 : index
    %c0_5240 = arith.constant 0 : index
    %c1_5241 = arith.constant 1 : index
    %4912 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5235, %cast_5182 : tensor<?x?x?x?xf32>, tensor<2048x512x1x1xf32>) outs(%4909 : tensor<1x2048x?x?xf32>) -> tensor<1x2048x?x?xf32>
    %cast_5242 = tensor.cast %4912 : tensor<1x2048x?x?xf32> to tensor<1x2048x7x7xf32>
    %4913 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4914 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5243 = torch.constant.int 12
    %4915 = torch.aten.item %4913 : !torch.vtensor<[],f32> -> !torch.float
    %4916 = torch_c.to_f64 %4915
    %4917 = torch.aten.item %4914 : !torch.vtensor<[],si8> -> !torch.int
    %4918 = torch_c.to_i64 %4917
    %c1_5244 = arith.constant 1 : index
    %c1_5245 = arith.constant 1 : index
    %c2048_5246 = arith.constant 2048 : index
    %c2_5247 = arith.constant 2 : index
    %c7_5248 = arith.constant 7 : index
    %c3_5249 = arith.constant 3 : index
    %c7_5250 = arith.constant 7 : index
    %4919 = tensor.empty() : tensor<1x2048x7x7xi8>
    %4920 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5242 : tensor<1x2048x7x7xf32>) outs(%4919 : tensor<1x2048x7x7xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4917
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4915
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x2048x7x7xi8>
    %cast_5251 = tensor.cast %4920 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %cast_5252 = tensor.cast %cast_5251 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %4921 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4922 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4923 = torch.aten.item %4921 : !torch.vtensor<[],f32> -> !torch.float
    %4924 = torch_c.to_f64 %4923
    %4925 = torch.aten.item %4922 : !torch.vtensor<[],si8> -> !torch.int
    %4926 = torch_c.to_i64 %4925
    %cast_5253 = tensor.cast %cast_5252 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %c1_5254 = arith.constant 1 : index
    %c1_5255 = arith.constant 1 : index
    %c2048_5256 = arith.constant 2048 : index
    %c2_5257 = arith.constant 2 : index
    %c7_5258 = arith.constant 7 : index
    %c3_5259 = arith.constant 3 : index
    %c7_5260 = arith.constant 7 : index
    %4927 = tensor.empty() : tensor<1x2048x7x7xf32>
    %4928 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5253 : tensor<1x2048x7x7xi8>) outs(%4927 : tensor<1x2048x7x7xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4925
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4923
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x2048x7x7xf32>
    %cast_5261 = tensor.cast %4928 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
    %4929 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4930 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5262 = torch.constant.int 12
    %4931 = torch.aten.item %4929 : !torch.vtensor<[],f32> -> !torch.float
    %4932 = torch_c.to_f64 %4931
    %4933 = torch.aten.item %4930 : !torch.vtensor<[],si8> -> !torch.int
    %4934 = torch_c.to_i64 %4933
    %c1_5263 = arith.constant 1 : index
    %c0_5264 = arith.constant 0 : index
    %c2048_5265 = arith.constant 2048 : index
    %c1_5266 = arith.constant 1 : index
    %c1024_5267 = arith.constant 1024 : index
    %4935 = tensor.empty() : tensor<2048x1024x1x1xi8>
    %4936 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%186 : tensor<2048x1024x1x1xf32>) outs(%4935 : tensor<2048x1024x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4933
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4931
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<2048x1024x1x1xi8>
    %cast_5268 = tensor.cast %4936 : tensor<2048x1024x1x1xi8> to tensor<2048x1024x1x1xi8>
    %cast_5269 = tensor.cast %cast_5268 : tensor<2048x1024x1x1xi8> to tensor<2048x1024x1x1xi8>
    %4937 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4938 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4939 = torch.aten.item %4937 : !torch.vtensor<[],f32> -> !torch.float
    %4940 = torch_c.to_f64 %4939
    %4941 = torch.aten.item %4938 : !torch.vtensor<[],si8> -> !torch.int
    %4942 = torch_c.to_i64 %4941
    %cast_5270 = tensor.cast %cast_5269 : tensor<2048x1024x1x1xi8> to tensor<2048x1024x1x1xi8>
    %c1_5271 = arith.constant 1 : index
    %c0_5272 = arith.constant 0 : index
    %c2048_5273 = arith.constant 2048 : index
    %c1_5274 = arith.constant 1 : index
    %c1024_5275 = arith.constant 1024 : index
    %4943 = tensor.empty() : tensor<2048x1024x1x1xf32>
    %4944 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5270 : tensor<2048x1024x1x1xi8>) outs(%4943 : tensor<2048x1024x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4941
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4939
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<2048x1024x1x1xf32>
    %cast_5276 = tensor.cast %4944 : tensor<2048x1024x1x1xf32> to tensor<2048x1024x1x1xf32>
    %4945 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4946 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5277 = torch.constant.int 12
    %4947 = torch.aten.item %4945 : !torch.vtensor<[],f32> -> !torch.float
    %4948 = torch_c.to_f64 %4947
    %4949 = torch.aten.item %4946 : !torch.vtensor<[],si8> -> !torch.int
    %4950 = torch_c.to_i64 %4949
    %c1_5278 = arith.constant 1 : index
    %c0_5279 = arith.constant 0 : index
    %c2048_5280 = arith.constant 2048 : index
    %4951 = tensor.empty() : tensor<2048xi8>
    %4952 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%188 : tensor<2048xf32>) outs(%4951 : tensor<2048xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %4949
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %4947
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<2048xi8>
    %cast_5281 = tensor.cast %4952 : tensor<2048xi8> to tensor<2048xi8>
    %cast_5282 = tensor.cast %cast_5281 : tensor<2048xi8> to tensor<2048xi8>
    %4953 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %4954 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %4955 = torch.aten.item %4953 : !torch.vtensor<[],f32> -> !torch.float
    %4956 = torch_c.to_f64 %4955
    %4957 = torch.aten.item %4954 : !torch.vtensor<[],si8> -> !torch.int
    %4958 = torch_c.to_i64 %4957
    %cast_5283 = tensor.cast %cast_5282 : tensor<2048xi8> to tensor<2048xi8>
    %c1_5284 = arith.constant 1 : index
    %c0_5285 = arith.constant 0 : index
    %c2048_5286 = arith.constant 2048 : index
    %4959 = tensor.empty() : tensor<2048xf32>
    %4960 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5283 : tensor<2048xi8>) outs(%4959 : tensor<2048xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %4957
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %4955
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<2048xf32>
    %cast_5287 = tensor.cast %4960 : tensor<2048xf32> to tensor<2048xf32>
    %int0_5288 = torch.constant.int 0
    %int0_5289 = torch.constant.int 0
    %int1_5290 = torch.constant.int 1
    %int1_5291 = torch.constant.int 1
    %int2_5292 = torch.constant.int 2
    %int2_5293 = torch.constant.int 2
    %int0_5294 = torch.constant.int 0
    %4961 = torch.prim.ListConstruct %int0_5288, %int0_5289 : (!torch.int, !torch.int) -> !torch.list<int>
    %4962 = torch.prim.ListConstruct %int1_5290, %int1_5291 : (!torch.int, !torch.int) -> !torch.list<int>
    %4963 = torch.prim.ListConstruct %int2_5292, %int2_5293 : (!torch.int, !torch.int) -> !torch.list<int>
    %4964 = torch.prim.ListConstruct %int0_5294, %int0_5294 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_5295 = torch.constant.bool false
    %int1_5296 = torch.constant.int 1
    %4965 = torch_c.to_i64 %int1_5296
    %4966 = torch_c.to_i64 %int0_5288
    %4967 = torch_c.to_i64 %int0_5289
    %4968 = torch_c.to_i64 %int0_5294
    %4969 = torch_c.to_i64 %int0_5294
    %c0_5297 = arith.constant 0 : index
    %c1_5298 = arith.constant 1 : index
    %c1_5299 = arith.constant 1 : index
    %c1024_5300 = arith.constant 1024 : index
    %c2_5301 = arith.constant 2 : index
    %c14_5302 = arith.constant 14 : index
    %c3_5303 = arith.constant 3 : index
    %c14_5304 = arith.constant 14 : index
    %c0_5305 = arith.constant 0 : index
    %c2048_5306 = arith.constant 2048 : index
    %c1_5307 = arith.constant 1 : index
    %c1024_5308 = arith.constant 1024 : index
    %c2_5309 = arith.constant 2 : index
    %c1_5310 = arith.constant 1 : index
    %c3_5311 = arith.constant 3 : index
    %c1_5312 = arith.constant 1 : index
    %4970 = arith.index_cast %4965 : i64 to index
    %c0_5313 = arith.constant 0 : index
    %4971 = arith.remsi %c1024_5300, %4970 : index
    %4972 = arith.cmpi eq, %c0_5313, %4971 : index
    cf.assert %4972, "invalid: groups must divide input channel size evenly."
    %c0_5314 = arith.constant 0 : index
    %4973 = arith.remsi %c2048_5306, %4970 : index
    %4974 = arith.cmpi eq, %c0_5314, %4973 : index
    cf.assert %4974, "invalid: groups must divide weight batch size evenly."
    %c1_i64_5315 = arith.constant 1 : i64
    %c1_i64_5316 = arith.constant 1 : i64
    %c2_i64_5317 = arith.constant 2 : i64
    %c2_i64_5318 = arith.constant 2 : i64
    %cst_5319 = arith.constant 0.000000e+00 : f32
    %c0_5320 = arith.constant 0 : index
    %c1_5321 = arith.constant 1 : index
    %c1_5322 = arith.constant 1 : index
    %c1024_5323 = arith.constant 1024 : index
    %c2_5324 = arith.constant 2 : index
    %c14_5325 = arith.constant 14 : index
    %c3_5326 = arith.constant 3 : index
    %c14_5327 = arith.constant 14 : index
    %c0_i64_5328 = arith.constant 0 : i64
    %4975 = arith.index_cast %c0_i64_5328 : i64 to index
    %4976 = arith.index_cast %c0_i64_5328 : i64 to index
    %4977 = arith.index_cast %4966 : i64 to index
    %4978 = arith.index_cast %4967 : i64 to index
    %padded_5329 = tensor.pad %cast_4956 low[%4975, %4976, %4977, %4978] high[%4975, %4976, %4977, %4978] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_5319 : f32
    } : tensor<1x1024x14x14xf32> to tensor<?x?x?x?xf32>
    %4979 = arith.index_cast %c1_5310 : index to i64
    %c1_i64_5330 = arith.constant 1 : i64
    %c2_i64_5331 = arith.constant 2 : i64
    %4980 = arith.muli %4966, %c2_i64_5331 : i64
    %4981 = arith.index_cast %c14_5302 : index to i64
    %4982 = arith.addi %4981, %4980 : i64
    %4983 = arith.subi %4979, %c1_i64_5330 : i64
    %4984 = arith.muli %c1_i64_5315, %4983 : i64
    %4985 = arith.subi %4982, %4984 : i64
    %4986 = arith.subi %4985, %c1_i64_5330 : i64
    %4987 = arith.floordivsi %4986, %c2_i64_5317 : i64
    %4988 = arith.addi %4987, %c1_i64_5330 : i64
    %4989 = arith.index_cast %4988 : i64 to index
    %4990 = arith.index_cast %c1_5312 : index to i64
    %c1_i64_5332 = arith.constant 1 : i64
    %c2_i64_5333 = arith.constant 2 : i64
    %4991 = arith.muli %4967, %c2_i64_5333 : i64
    %4992 = arith.index_cast %c14_5304 : index to i64
    %4993 = arith.addi %4992, %4991 : i64
    %4994 = arith.subi %4990, %c1_i64_5332 : i64
    %4995 = arith.muli %c1_i64_5316, %4994 : i64
    %4996 = arith.subi %4993, %4995 : i64
    %4997 = arith.subi %4996, %c1_i64_5332 : i64
    %4998 = arith.floordivsi %4997, %c2_i64_5318 : i64
    %4999 = arith.addi %4998, %c1_i64_5332 : i64
    %5000 = arith.index_cast %4999 : i64 to index
    %5001 = tensor.empty(%4989, %5000) : tensor<1x2048x?x?xf32>
    %5002 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5287 : tensor<2048xf32>) outs(%5001 : tensor<1x2048x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x2048x?x?xf32>
    %5003 = arith.floordivsi %c1024_5300, %4970 : index
    %5004 = arith.floordivsi %c2048_5306, %4970 : index
    %c0_5334 = arith.constant 0 : index
    %c1_5335 = arith.constant 1 : index
    %5005 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%padded_5329, %cast_5276 : tensor<?x?x?x?xf32>, tensor<2048x1024x1x1xf32>) outs(%5002 : tensor<1x2048x?x?xf32>) -> tensor<1x2048x?x?xf32>
    %cast_5336 = tensor.cast %5005 : tensor<1x2048x?x?xf32> to tensor<1x2048x7x7xf32>
    %5006 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5007 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5337 = torch.constant.int 12
    %5008 = torch.aten.item %5006 : !torch.vtensor<[],f32> -> !torch.float
    %5009 = torch_c.to_f64 %5008
    %5010 = torch.aten.item %5007 : !torch.vtensor<[],si8> -> !torch.int
    %5011 = torch_c.to_i64 %5010
    %c1_5338 = arith.constant 1 : index
    %c1_5339 = arith.constant 1 : index
    %c2048_5340 = arith.constant 2048 : index
    %c2_5341 = arith.constant 2 : index
    %c7_5342 = arith.constant 7 : index
    %c3_5343 = arith.constant 3 : index
    %c7_5344 = arith.constant 7 : index
    %5012 = tensor.empty() : tensor<1x2048x7x7xi8>
    %5013 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5336 : tensor<1x2048x7x7xf32>) outs(%5012 : tensor<1x2048x7x7xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5010
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5008
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x2048x7x7xi8>
    %cast_5345 = tensor.cast %5013 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %cast_5346 = tensor.cast %cast_5345 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %5014 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5015 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5016 = torch.aten.item %5014 : !torch.vtensor<[],f32> -> !torch.float
    %5017 = torch_c.to_f64 %5016
    %5018 = torch.aten.item %5015 : !torch.vtensor<[],si8> -> !torch.int
    %5019 = torch_c.to_i64 %5018
    %cast_5347 = tensor.cast %cast_5346 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %c1_5348 = arith.constant 1 : index
    %c1_5349 = arith.constant 1 : index
    %c2048_5350 = arith.constant 2048 : index
    %c2_5351 = arith.constant 2 : index
    %c7_5352 = arith.constant 7 : index
    %c3_5353 = arith.constant 3 : index
    %c7_5354 = arith.constant 7 : index
    %5020 = tensor.empty() : tensor<1x2048x7x7xf32>
    %5021 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5347 : tensor<1x2048x7x7xi8>) outs(%5020 : tensor<1x2048x7x7xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5018
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5016
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x2048x7x7xf32>
    %cast_5355 = tensor.cast %5021 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
    %int1_5356 = torch.constant.int 1
    %5022 = torch_c.to_i64 %int1_5356
    %c1_5357 = arith.constant 1 : index
    %c1_5358 = arith.constant 1 : index
    %c2048_5359 = arith.constant 2048 : index
    %c2_5360 = arith.constant 2 : index
    %c7_5361 = arith.constant 7 : index
    %c3_5362 = arith.constant 3 : index
    %c7_5363 = arith.constant 7 : index
    %c1_5364 = arith.constant 1 : index
    %c2048_5365 = arith.constant 2048 : index
    %5023 = arith.cmpi eq, %c2048_5359, %c2048_5365 : index
    cf.assert %5023, "mismatched size for broadcast"
    %c2_5366 = arith.constant 2 : index
    %c7_5367 = arith.constant 7 : index
    %5024 = arith.cmpi eq, %c7_5361, %c7_5367 : index
    cf.assert %5024, "mismatched size for broadcast"
    %c3_5368 = arith.constant 3 : index
    %c7_5369 = arith.constant 7 : index
    %5025 = arith.cmpi eq, %c7_5363, %c7_5369 : index
    cf.assert %5025, "mismatched size for broadcast"
    %5026 = tensor.empty() : tensor<1x2048x7x7xf32>
    %5027 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5261, %cast_5355 : tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) outs(%5026 : tensor<1x2048x7x7xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %5022 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x2048x7x7xf32>
    %cast_5370 = tensor.cast %5027 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
    %c1_5371 = arith.constant 1 : index
    %c1_5372 = arith.constant 1 : index
    %c2048_5373 = arith.constant 2048 : index
    %c2_5374 = arith.constant 2 : index
    %c7_5375 = arith.constant 7 : index
    %c3_5376 = arith.constant 3 : index
    %c7_5377 = arith.constant 7 : index
    %5028 = tensor.empty() : tensor<1x2048x7x7xf32>
    %5029 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5370 : tensor<1x2048x7x7xf32>) outs(%5028 : tensor<1x2048x7x7xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x2048x7x7xf32>
    %cast_5378 = tensor.cast %5029 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
    %5030 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5031 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5379 = torch.constant.int 12
    %5032 = torch.aten.item %5030 : !torch.vtensor<[],f32> -> !torch.float
    %5033 = torch_c.to_f64 %5032
    %5034 = torch.aten.item %5031 : !torch.vtensor<[],si8> -> !torch.int
    %5035 = torch_c.to_i64 %5034
    %c1_5380 = arith.constant 1 : index
    %c1_5381 = arith.constant 1 : index
    %c2048_5382 = arith.constant 2048 : index
    %c2_5383 = arith.constant 2 : index
    %c7_5384 = arith.constant 7 : index
    %c3_5385 = arith.constant 3 : index
    %c7_5386 = arith.constant 7 : index
    %5036 = tensor.empty() : tensor<1x2048x7x7xi8>
    %5037 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5378 : tensor<1x2048x7x7xf32>) outs(%5036 : tensor<1x2048x7x7xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5034
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5032
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x2048x7x7xi8>
    %cast_5387 = tensor.cast %5037 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %cast_5388 = tensor.cast %cast_5387 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %5038 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5039 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5040 = torch.aten.item %5038 : !torch.vtensor<[],f32> -> !torch.float
    %5041 = torch_c.to_f64 %5040
    %5042 = torch.aten.item %5039 : !torch.vtensor<[],si8> -> !torch.int
    %5043 = torch_c.to_i64 %5042
    %cast_5389 = tensor.cast %cast_5388 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %c1_5390 = arith.constant 1 : index
    %c1_5391 = arith.constant 1 : index
    %c2048_5392 = arith.constant 2048 : index
    %c2_5393 = arith.constant 2 : index
    %c7_5394 = arith.constant 7 : index
    %c3_5395 = arith.constant 3 : index
    %c7_5396 = arith.constant 7 : index
    %5044 = tensor.empty() : tensor<1x2048x7x7xf32>
    %5045 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5389 : tensor<1x2048x7x7xi8>) outs(%5044 : tensor<1x2048x7x7xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5042
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5040
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x2048x7x7xf32>
    %cast_5397 = tensor.cast %5045 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
    %5046 = torch.vtensor.literal(dense<4.8828125E-4> : tensor<f32>) : !torch.vtensor<[],f32>
    %5047 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5398 = torch.constant.int 12
    %5048 = torch.aten.item %5046 : !torch.vtensor<[],f32> -> !torch.float
    %5049 = torch_c.to_f64 %5048
    %5050 = torch.aten.item %5047 : !torch.vtensor<[],si8> -> !torch.int
    %5051 = torch_c.to_i64 %5050
    %c1_5399 = arith.constant 1 : index
    %c0_5400 = arith.constant 0 : index
    %c512_5401 = arith.constant 512 : index
    %c1_5402 = arith.constant 1 : index
    %c2048_5403 = arith.constant 2048 : index
    %5052 = tensor.empty() : tensor<512x2048x1x1xi8>
    %5053 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%190 : tensor<512x2048x1x1xf32>) outs(%5052 : tensor<512x2048x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5050
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5048
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512x2048x1x1xi8>
    %cast_5404 = tensor.cast %5053 : tensor<512x2048x1x1xi8> to tensor<512x2048x1x1xi8>
    %cast_5405 = tensor.cast %cast_5404 : tensor<512x2048x1x1xi8> to tensor<512x2048x1x1xi8>
    %5054 = torch.vtensor.literal(dense<4.8828125E-4> : tensor<f32>) : !torch.vtensor<[],f32>
    %5055 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5056 = torch.aten.item %5054 : !torch.vtensor<[],f32> -> !torch.float
    %5057 = torch_c.to_f64 %5056
    %5058 = torch.aten.item %5055 : !torch.vtensor<[],si8> -> !torch.int
    %5059 = torch_c.to_i64 %5058
    %cast_5406 = tensor.cast %cast_5405 : tensor<512x2048x1x1xi8> to tensor<512x2048x1x1xi8>
    %c1_5407 = arith.constant 1 : index
    %c0_5408 = arith.constant 0 : index
    %c512_5409 = arith.constant 512 : index
    %c1_5410 = arith.constant 1 : index
    %c2048_5411 = arith.constant 2048 : index
    %5060 = tensor.empty() : tensor<512x2048x1x1xf32>
    %5061 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5406 : tensor<512x2048x1x1xi8>) outs(%5060 : tensor<512x2048x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5058
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5056
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512x2048x1x1xf32>
    %cast_5412 = tensor.cast %5061 : tensor<512x2048x1x1xf32> to tensor<512x2048x1x1xf32>
    %5062 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5063 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5413 = torch.constant.int 12
    %5064 = torch.aten.item %5062 : !torch.vtensor<[],f32> -> !torch.float
    %5065 = torch_c.to_f64 %5064
    %5066 = torch.aten.item %5063 : !torch.vtensor<[],si8> -> !torch.int
    %5067 = torch_c.to_i64 %5066
    %c1_5414 = arith.constant 1 : index
    %c0_5415 = arith.constant 0 : index
    %c512_5416 = arith.constant 512 : index
    %5068 = tensor.empty() : tensor<512xi8>
    %5069 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%192 : tensor<512xf32>) outs(%5068 : tensor<512xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5066
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5064
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512xi8>
    %cast_5417 = tensor.cast %5069 : tensor<512xi8> to tensor<512xi8>
    %cast_5418 = tensor.cast %cast_5417 : tensor<512xi8> to tensor<512xi8>
    %5070 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5071 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5072 = torch.aten.item %5070 : !torch.vtensor<[],f32> -> !torch.float
    %5073 = torch_c.to_f64 %5072
    %5074 = torch.aten.item %5071 : !torch.vtensor<[],si8> -> !torch.int
    %5075 = torch_c.to_i64 %5074
    %cast_5419 = tensor.cast %cast_5418 : tensor<512xi8> to tensor<512xi8>
    %c1_5420 = arith.constant 1 : index
    %c0_5421 = arith.constant 0 : index
    %c512_5422 = arith.constant 512 : index
    %5076 = tensor.empty() : tensor<512xf32>
    %5077 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5419 : tensor<512xi8>) outs(%5076 : tensor<512xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5074
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5072
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512xf32>
    %cast_5423 = tensor.cast %5077 : tensor<512xf32> to tensor<512xf32>
    %int0_5424 = torch.constant.int 0
    %int0_5425 = torch.constant.int 0
    %int1_5426 = torch.constant.int 1
    %int1_5427 = torch.constant.int 1
    %int1_5428 = torch.constant.int 1
    %int1_5429 = torch.constant.int 1
    %int0_5430 = torch.constant.int 0
    %5078 = torch.prim.ListConstruct %int0_5424, %int0_5425 : (!torch.int, !torch.int) -> !torch.list<int>
    %5079 = torch.prim.ListConstruct %int1_5426, %int1_5427 : (!torch.int, !torch.int) -> !torch.list<int>
    %5080 = torch.prim.ListConstruct %int1_5428, %int1_5429 : (!torch.int, !torch.int) -> !torch.list<int>
    %5081 = torch.prim.ListConstruct %int0_5430, %int0_5430 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_5431 = torch.constant.bool false
    %int1_5432 = torch.constant.int 1
    %5082 = torch_c.to_i64 %int1_5432
    %5083 = torch_c.to_i64 %int0_5424
    %5084 = torch_c.to_i64 %int0_5425
    %5085 = torch_c.to_i64 %int0_5430
    %5086 = torch_c.to_i64 %int0_5430
    %c0_5433 = arith.constant 0 : index
    %c1_5434 = arith.constant 1 : index
    %c1_5435 = arith.constant 1 : index
    %c2048_5436 = arith.constant 2048 : index
    %c2_5437 = arith.constant 2 : index
    %c7_5438 = arith.constant 7 : index
    %c3_5439 = arith.constant 3 : index
    %c7_5440 = arith.constant 7 : index
    %c0_5441 = arith.constant 0 : index
    %c512_5442 = arith.constant 512 : index
    %c1_5443 = arith.constant 1 : index
    %c2048_5444 = arith.constant 2048 : index
    %c2_5445 = arith.constant 2 : index
    %c1_5446 = arith.constant 1 : index
    %c3_5447 = arith.constant 3 : index
    %c1_5448 = arith.constant 1 : index
    %5087 = arith.index_cast %5082 : i64 to index
    %c0_5449 = arith.constant 0 : index
    %5088 = arith.remsi %c2048_5436, %5087 : index
    %5089 = arith.cmpi eq, %c0_5449, %5088 : index
    cf.assert %5089, "invalid: groups must divide input channel size evenly."
    %c0_5450 = arith.constant 0 : index
    %5090 = arith.remsi %c512_5442, %5087 : index
    %5091 = arith.cmpi eq, %c0_5450, %5090 : index
    cf.assert %5091, "invalid: groups must divide weight batch size evenly."
    %c1_i64_5451 = arith.constant 1 : i64
    %c1_i64_5452 = arith.constant 1 : i64
    %c1_i64_5453 = arith.constant 1 : i64
    %c1_i64_5454 = arith.constant 1 : i64
    %cst_5455 = arith.constant 0.000000e+00 : f32
    %c0_5456 = arith.constant 0 : index
    %c1_5457 = arith.constant 1 : index
    %c1_5458 = arith.constant 1 : index
    %c2048_5459 = arith.constant 2048 : index
    %c2_5460 = arith.constant 2 : index
    %c7_5461 = arith.constant 7 : index
    %c3_5462 = arith.constant 3 : index
    %c7_5463 = arith.constant 7 : index
    %c0_i64_5464 = arith.constant 0 : i64
    %5092 = arith.index_cast %c0_i64_5464 : i64 to index
    %5093 = arith.index_cast %c0_i64_5464 : i64 to index
    %5094 = arith.index_cast %5083 : i64 to index
    %5095 = arith.index_cast %5084 : i64 to index
    %padded_5465 = tensor.pad %cast_5397 low[%5092, %5093, %5094, %5095] high[%5092, %5093, %5094, %5095] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_5455 : f32
    } : tensor<1x2048x7x7xf32> to tensor<?x?x?x?xf32>
    %5096 = arith.index_cast %c1_5446 : index to i64
    %c1_i64_5466 = arith.constant 1 : i64
    %c2_i64_5467 = arith.constant 2 : i64
    %5097 = arith.muli %5083, %c2_i64_5467 : i64
    %5098 = arith.index_cast %c7_5438 : index to i64
    %5099 = arith.addi %5098, %5097 : i64
    %5100 = arith.subi %5096, %c1_i64_5466 : i64
    %5101 = arith.muli %c1_i64_5451, %5100 : i64
    %5102 = arith.subi %5099, %5101 : i64
    %5103 = arith.subi %5102, %c1_i64_5466 : i64
    %5104 = arith.floordivsi %5103, %c1_i64_5453 : i64
    %5105 = arith.addi %5104, %c1_i64_5466 : i64
    %5106 = arith.index_cast %5105 : i64 to index
    %5107 = arith.index_cast %c1_5448 : index to i64
    %c1_i64_5468 = arith.constant 1 : i64
    %c2_i64_5469 = arith.constant 2 : i64
    %5108 = arith.muli %5084, %c2_i64_5469 : i64
    %5109 = arith.index_cast %c7_5440 : index to i64
    %5110 = arith.addi %5109, %5108 : i64
    %5111 = arith.subi %5107, %c1_i64_5468 : i64
    %5112 = arith.muli %c1_i64_5452, %5111 : i64
    %5113 = arith.subi %5110, %5112 : i64
    %5114 = arith.subi %5113, %c1_i64_5468 : i64
    %5115 = arith.floordivsi %5114, %c1_i64_5454 : i64
    %5116 = arith.addi %5115, %c1_i64_5468 : i64
    %5117 = arith.index_cast %5116 : i64 to index
    %5118 = tensor.empty(%5106, %5117) : tensor<1x512x?x?xf32>
    %5119 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5423 : tensor<512xf32>) outs(%5118 : tensor<1x512x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x512x?x?xf32>
    %5120 = arith.floordivsi %c2048_5436, %5087 : index
    %5121 = arith.floordivsi %c512_5442, %5087 : index
    %c0_5470 = arith.constant 0 : index
    %c1_5471 = arith.constant 1 : index
    %5122 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5465, %cast_5412 : tensor<?x?x?x?xf32>, tensor<512x2048x1x1xf32>) outs(%5119 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
    %cast_5472 = tensor.cast %5122 : tensor<1x512x?x?xf32> to tensor<1x512x7x7xf32>
    %c1_5473 = arith.constant 1 : index
    %c1_5474 = arith.constant 1 : index
    %c512_5475 = arith.constant 512 : index
    %c2_5476 = arith.constant 2 : index
    %c7_5477 = arith.constant 7 : index
    %c3_5478 = arith.constant 3 : index
    %c7_5479 = arith.constant 7 : index
    %5123 = tensor.empty() : tensor<1x512x7x7xf32>
    %5124 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5472 : tensor<1x512x7x7xf32>) outs(%5123 : tensor<1x512x7x7xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x512x7x7xf32>
    %cast_5480 = tensor.cast %5124 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
    %5125 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5126 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5481 = torch.constant.int 12
    %5127 = torch.aten.item %5125 : !torch.vtensor<[],f32> -> !torch.float
    %5128 = torch_c.to_f64 %5127
    %5129 = torch.aten.item %5126 : !torch.vtensor<[],si8> -> !torch.int
    %5130 = torch_c.to_i64 %5129
    %c1_5482 = arith.constant 1 : index
    %c1_5483 = arith.constant 1 : index
    %c512_5484 = arith.constant 512 : index
    %c2_5485 = arith.constant 2 : index
    %c7_5486 = arith.constant 7 : index
    %c3_5487 = arith.constant 3 : index
    %c7_5488 = arith.constant 7 : index
    %5131 = tensor.empty() : tensor<1x512x7x7xi8>
    %5132 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5480 : tensor<1x512x7x7xf32>) outs(%5131 : tensor<1x512x7x7xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5129
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5127
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x7x7xi8>
    %cast_5489 = tensor.cast %5132 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
    %cast_5490 = tensor.cast %cast_5489 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
    %5133 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5134 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5135 = torch.aten.item %5133 : !torch.vtensor<[],f32> -> !torch.float
    %5136 = torch_c.to_f64 %5135
    %5137 = torch.aten.item %5134 : !torch.vtensor<[],si8> -> !torch.int
    %5138 = torch_c.to_i64 %5137
    %cast_5491 = tensor.cast %cast_5490 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
    %c1_5492 = arith.constant 1 : index
    %c1_5493 = arith.constant 1 : index
    %c512_5494 = arith.constant 512 : index
    %c2_5495 = arith.constant 2 : index
    %c7_5496 = arith.constant 7 : index
    %c3_5497 = arith.constant 3 : index
    %c7_5498 = arith.constant 7 : index
    %5139 = tensor.empty() : tensor<1x512x7x7xf32>
    %5140 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5491 : tensor<1x512x7x7xi8>) outs(%5139 : tensor<1x512x7x7xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5137
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5135
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x7x7xf32>
    %cast_5499 = tensor.cast %5140 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
    %5141 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5142 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5500 = torch.constant.int 12
    %5143 = torch.aten.item %5141 : !torch.vtensor<[],f32> -> !torch.float
    %5144 = torch_c.to_f64 %5143
    %5145 = torch.aten.item %5142 : !torch.vtensor<[],si8> -> !torch.int
    %5146 = torch_c.to_i64 %5145
    %c1_5501 = arith.constant 1 : index
    %c0_5502 = arith.constant 0 : index
    %c512_5503 = arith.constant 512 : index
    %c1_5504 = arith.constant 1 : index
    %c512_5505 = arith.constant 512 : index
    %c2_5506 = arith.constant 2 : index
    %c3_5507 = arith.constant 3 : index
    %c3_5508 = arith.constant 3 : index
    %c3_5509 = arith.constant 3 : index
    %5147 = tensor.empty() : tensor<512x512x3x3xi8>
    %5148 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%194 : tensor<512x512x3x3xf32>) outs(%5147 : tensor<512x512x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5145
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5143
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512x512x3x3xi8>
    %cast_5510 = tensor.cast %5148 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
    %cast_5511 = tensor.cast %cast_5510 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
    %5149 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5150 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5151 = torch.aten.item %5149 : !torch.vtensor<[],f32> -> !torch.float
    %5152 = torch_c.to_f64 %5151
    %5153 = torch.aten.item %5150 : !torch.vtensor<[],si8> -> !torch.int
    %5154 = torch_c.to_i64 %5153
    %cast_5512 = tensor.cast %cast_5511 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
    %c1_5513 = arith.constant 1 : index
    %c0_5514 = arith.constant 0 : index
    %c512_5515 = arith.constant 512 : index
    %c1_5516 = arith.constant 1 : index
    %c512_5517 = arith.constant 512 : index
    %c2_5518 = arith.constant 2 : index
    %c3_5519 = arith.constant 3 : index
    %c3_5520 = arith.constant 3 : index
    %c3_5521 = arith.constant 3 : index
    %5155 = tensor.empty() : tensor<512x512x3x3xf32>
    %5156 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5512 : tensor<512x512x3x3xi8>) outs(%5155 : tensor<512x512x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5153
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5151
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512x512x3x3xf32>
    %cast_5522 = tensor.cast %5156 : tensor<512x512x3x3xf32> to tensor<512x512x3x3xf32>
    %5157 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5158 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5523 = torch.constant.int 12
    %5159 = torch.aten.item %5157 : !torch.vtensor<[],f32> -> !torch.float
    %5160 = torch_c.to_f64 %5159
    %5161 = torch.aten.item %5158 : !torch.vtensor<[],si8> -> !torch.int
    %5162 = torch_c.to_i64 %5161
    %c1_5524 = arith.constant 1 : index
    %c0_5525 = arith.constant 0 : index
    %c512_5526 = arith.constant 512 : index
    %5163 = tensor.empty() : tensor<512xi8>
    %5164 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%196 : tensor<512xf32>) outs(%5163 : tensor<512xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5161
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5159
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512xi8>
    %cast_5527 = tensor.cast %5164 : tensor<512xi8> to tensor<512xi8>
    %cast_5528 = tensor.cast %cast_5527 : tensor<512xi8> to tensor<512xi8>
    %5165 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5166 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5167 = torch.aten.item %5165 : !torch.vtensor<[],f32> -> !torch.float
    %5168 = torch_c.to_f64 %5167
    %5169 = torch.aten.item %5166 : !torch.vtensor<[],si8> -> !torch.int
    %5170 = torch_c.to_i64 %5169
    %cast_5529 = tensor.cast %cast_5528 : tensor<512xi8> to tensor<512xi8>
    %c1_5530 = arith.constant 1 : index
    %c0_5531 = arith.constant 0 : index
    %c512_5532 = arith.constant 512 : index
    %5171 = tensor.empty() : tensor<512xf32>
    %5172 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5529 : tensor<512xi8>) outs(%5171 : tensor<512xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5169
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5167
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512xf32>
    %cast_5533 = tensor.cast %5172 : tensor<512xf32> to tensor<512xf32>
    %int1_5534 = torch.constant.int 1
    %int1_5535 = torch.constant.int 1
    %int1_5536 = torch.constant.int 1
    %int1_5537 = torch.constant.int 1
    %int1_5538 = torch.constant.int 1
    %int1_5539 = torch.constant.int 1
    %int0_5540 = torch.constant.int 0
    %5173 = torch.prim.ListConstruct %int1_5534, %int1_5535 : (!torch.int, !torch.int) -> !torch.list<int>
    %5174 = torch.prim.ListConstruct %int1_5536, %int1_5537 : (!torch.int, !torch.int) -> !torch.list<int>
    %5175 = torch.prim.ListConstruct %int1_5538, %int1_5539 : (!torch.int, !torch.int) -> !torch.list<int>
    %5176 = torch.prim.ListConstruct %int0_5540, %int0_5540 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_5541 = torch.constant.bool false
    %int1_5542 = torch.constant.int 1
    %5177 = torch_c.to_i64 %int1_5542
    %5178 = torch_c.to_i64 %int1_5534
    %5179 = torch_c.to_i64 %int1_5535
    %5180 = torch_c.to_i64 %int0_5540
    %5181 = torch_c.to_i64 %int0_5540
    %c0_5543 = arith.constant 0 : index
    %c1_5544 = arith.constant 1 : index
    %c1_5545 = arith.constant 1 : index
    %c512_5546 = arith.constant 512 : index
    %c2_5547 = arith.constant 2 : index
    %c7_5548 = arith.constant 7 : index
    %c3_5549 = arith.constant 3 : index
    %c7_5550 = arith.constant 7 : index
    %c0_5551 = arith.constant 0 : index
    %c512_5552 = arith.constant 512 : index
    %c1_5553 = arith.constant 1 : index
    %c512_5554 = arith.constant 512 : index
    %c2_5555 = arith.constant 2 : index
    %c3_5556 = arith.constant 3 : index
    %c3_5557 = arith.constant 3 : index
    %c3_5558 = arith.constant 3 : index
    %5182 = arith.index_cast %5177 : i64 to index
    %c0_5559 = arith.constant 0 : index
    %5183 = arith.remsi %c512_5546, %5182 : index
    %5184 = arith.cmpi eq, %c0_5559, %5183 : index
    cf.assert %5184, "invalid: groups must divide input channel size evenly."
    %c0_5560 = arith.constant 0 : index
    %5185 = arith.remsi %c512_5552, %5182 : index
    %5186 = arith.cmpi eq, %c0_5560, %5185 : index
    cf.assert %5186, "invalid: groups must divide weight batch size evenly."
    %c1_i64_5561 = arith.constant 1 : i64
    %c1_i64_5562 = arith.constant 1 : i64
    %c1_i64_5563 = arith.constant 1 : i64
    %c1_i64_5564 = arith.constant 1 : i64
    %cst_5565 = arith.constant 0.000000e+00 : f32
    %c0_5566 = arith.constant 0 : index
    %c1_5567 = arith.constant 1 : index
    %c1_5568 = arith.constant 1 : index
    %c512_5569 = arith.constant 512 : index
    %c2_5570 = arith.constant 2 : index
    %c7_5571 = arith.constant 7 : index
    %c3_5572 = arith.constant 3 : index
    %c7_5573 = arith.constant 7 : index
    %c0_i64_5574 = arith.constant 0 : i64
    %5187 = arith.index_cast %c0_i64_5574 : i64 to index
    %5188 = arith.index_cast %c0_i64_5574 : i64 to index
    %5189 = arith.index_cast %5178 : i64 to index
    %5190 = arith.index_cast %5179 : i64 to index
    %padded_5575 = tensor.pad %cast_5499 low[%5187, %5188, %5189, %5190] high[%5187, %5188, %5189, %5190] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_5565 : f32
    } : tensor<1x512x7x7xf32> to tensor<?x?x?x?xf32>
    %5191 = arith.index_cast %c3_5556 : index to i64
    %c1_i64_5576 = arith.constant 1 : i64
    %c2_i64_5577 = arith.constant 2 : i64
    %5192 = arith.muli %5178, %c2_i64_5577 : i64
    %5193 = arith.index_cast %c7_5548 : index to i64
    %5194 = arith.addi %5193, %5192 : i64
    %5195 = arith.subi %5191, %c1_i64_5576 : i64
    %5196 = arith.muli %c1_i64_5561, %5195 : i64
    %5197 = arith.subi %5194, %5196 : i64
    %5198 = arith.subi %5197, %c1_i64_5576 : i64
    %5199 = arith.floordivsi %5198, %c1_i64_5563 : i64
    %5200 = arith.addi %5199, %c1_i64_5576 : i64
    %5201 = arith.index_cast %5200 : i64 to index
    %5202 = arith.index_cast %c3_5558 : index to i64
    %c1_i64_5578 = arith.constant 1 : i64
    %c2_i64_5579 = arith.constant 2 : i64
    %5203 = arith.muli %5179, %c2_i64_5579 : i64
    %5204 = arith.index_cast %c7_5550 : index to i64
    %5205 = arith.addi %5204, %5203 : i64
    %5206 = arith.subi %5202, %c1_i64_5578 : i64
    %5207 = arith.muli %c1_i64_5562, %5206 : i64
    %5208 = arith.subi %5205, %5207 : i64
    %5209 = arith.subi %5208, %c1_i64_5578 : i64
    %5210 = arith.floordivsi %5209, %c1_i64_5564 : i64
    %5211 = arith.addi %5210, %c1_i64_5578 : i64
    %5212 = arith.index_cast %5211 : i64 to index
    %5213 = tensor.empty(%5201, %5212) : tensor<1x512x?x?xf32>
    %5214 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5533 : tensor<512xf32>) outs(%5213 : tensor<1x512x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x512x?x?xf32>
    %5215 = arith.floordivsi %c512_5546, %5182 : index
    %5216 = arith.floordivsi %c512_5552, %5182 : index
    %c0_5580 = arith.constant 0 : index
    %c1_5581 = arith.constant 1 : index
    %5217 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5575, %cast_5522 : tensor<?x?x?x?xf32>, tensor<512x512x3x3xf32>) outs(%5214 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
    %cast_5582 = tensor.cast %5217 : tensor<1x512x?x?xf32> to tensor<1x512x7x7xf32>
    %c1_5583 = arith.constant 1 : index
    %c1_5584 = arith.constant 1 : index
    %c512_5585 = arith.constant 512 : index
    %c2_5586 = arith.constant 2 : index
    %c7_5587 = arith.constant 7 : index
    %c3_5588 = arith.constant 3 : index
    %c7_5589 = arith.constant 7 : index
    %5218 = tensor.empty() : tensor<1x512x7x7xf32>
    %5219 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5582 : tensor<1x512x7x7xf32>) outs(%5218 : tensor<1x512x7x7xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x512x7x7xf32>
    %cast_5590 = tensor.cast %5219 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
    %5220 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5221 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5591 = torch.constant.int 12
    %5222 = torch.aten.item %5220 : !torch.vtensor<[],f32> -> !torch.float
    %5223 = torch_c.to_f64 %5222
    %5224 = torch.aten.item %5221 : !torch.vtensor<[],si8> -> !torch.int
    %5225 = torch_c.to_i64 %5224
    %c1_5592 = arith.constant 1 : index
    %c1_5593 = arith.constant 1 : index
    %c512_5594 = arith.constant 512 : index
    %c2_5595 = arith.constant 2 : index
    %c7_5596 = arith.constant 7 : index
    %c3_5597 = arith.constant 3 : index
    %c7_5598 = arith.constant 7 : index
    %5226 = tensor.empty() : tensor<1x512x7x7xi8>
    %5227 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5590 : tensor<1x512x7x7xf32>) outs(%5226 : tensor<1x512x7x7xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5224
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5222
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x7x7xi8>
    %cast_5599 = tensor.cast %5227 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
    %cast_5600 = tensor.cast %cast_5599 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
    %5228 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5229 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5230 = torch.aten.item %5228 : !torch.vtensor<[],f32> -> !torch.float
    %5231 = torch_c.to_f64 %5230
    %5232 = torch.aten.item %5229 : !torch.vtensor<[],si8> -> !torch.int
    %5233 = torch_c.to_i64 %5232
    %cast_5601 = tensor.cast %cast_5600 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
    %c1_5602 = arith.constant 1 : index
    %c1_5603 = arith.constant 1 : index
    %c512_5604 = arith.constant 512 : index
    %c2_5605 = arith.constant 2 : index
    %c7_5606 = arith.constant 7 : index
    %c3_5607 = arith.constant 3 : index
    %c7_5608 = arith.constant 7 : index
    %5234 = tensor.empty() : tensor<1x512x7x7xf32>
    %5235 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5601 : tensor<1x512x7x7xi8>) outs(%5234 : tensor<1x512x7x7xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5232
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5230
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x7x7xf32>
    %cast_5609 = tensor.cast %5235 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
    %5236 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5237 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5610 = torch.constant.int 12
    %5238 = torch.aten.item %5236 : !torch.vtensor<[],f32> -> !torch.float
    %5239 = torch_c.to_f64 %5238
    %5240 = torch.aten.item %5237 : !torch.vtensor<[],si8> -> !torch.int
    %5241 = torch_c.to_i64 %5240
    %c1_5611 = arith.constant 1 : index
    %c0_5612 = arith.constant 0 : index
    %c2048_5613 = arith.constant 2048 : index
    %c1_5614 = arith.constant 1 : index
    %c512_5615 = arith.constant 512 : index
    %5242 = tensor.empty() : tensor<2048x512x1x1xi8>
    %5243 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%198 : tensor<2048x512x1x1xf32>) outs(%5242 : tensor<2048x512x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5240
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5238
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<2048x512x1x1xi8>
    %cast_5616 = tensor.cast %5243 : tensor<2048x512x1x1xi8> to tensor<2048x512x1x1xi8>
    %cast_5617 = tensor.cast %cast_5616 : tensor<2048x512x1x1xi8> to tensor<2048x512x1x1xi8>
    %5244 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5245 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5246 = torch.aten.item %5244 : !torch.vtensor<[],f32> -> !torch.float
    %5247 = torch_c.to_f64 %5246
    %5248 = torch.aten.item %5245 : !torch.vtensor<[],si8> -> !torch.int
    %5249 = torch_c.to_i64 %5248
    %cast_5618 = tensor.cast %cast_5617 : tensor<2048x512x1x1xi8> to tensor<2048x512x1x1xi8>
    %c1_5619 = arith.constant 1 : index
    %c0_5620 = arith.constant 0 : index
    %c2048_5621 = arith.constant 2048 : index
    %c1_5622 = arith.constant 1 : index
    %c512_5623 = arith.constant 512 : index
    %5250 = tensor.empty() : tensor<2048x512x1x1xf32>
    %5251 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5618 : tensor<2048x512x1x1xi8>) outs(%5250 : tensor<2048x512x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5248
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5246
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<2048x512x1x1xf32>
    %cast_5624 = tensor.cast %5251 : tensor<2048x512x1x1xf32> to tensor<2048x512x1x1xf32>
    %5252 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5253 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5625 = torch.constant.int 12
    %5254 = torch.aten.item %5252 : !torch.vtensor<[],f32> -> !torch.float
    %5255 = torch_c.to_f64 %5254
    %5256 = torch.aten.item %5253 : !torch.vtensor<[],si8> -> !torch.int
    %5257 = torch_c.to_i64 %5256
    %c1_5626 = arith.constant 1 : index
    %c0_5627 = arith.constant 0 : index
    %c2048_5628 = arith.constant 2048 : index
    %5258 = tensor.empty() : tensor<2048xi8>
    %5259 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%200 : tensor<2048xf32>) outs(%5258 : tensor<2048xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5256
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5254
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<2048xi8>
    %cast_5629 = tensor.cast %5259 : tensor<2048xi8> to tensor<2048xi8>
    %cast_5630 = tensor.cast %cast_5629 : tensor<2048xi8> to tensor<2048xi8>
    %5260 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5261 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5262 = torch.aten.item %5260 : !torch.vtensor<[],f32> -> !torch.float
    %5263 = torch_c.to_f64 %5262
    %5264 = torch.aten.item %5261 : !torch.vtensor<[],si8> -> !torch.int
    %5265 = torch_c.to_i64 %5264
    %cast_5631 = tensor.cast %cast_5630 : tensor<2048xi8> to tensor<2048xi8>
    %c1_5632 = arith.constant 1 : index
    %c0_5633 = arith.constant 0 : index
    %c2048_5634 = arith.constant 2048 : index
    %5266 = tensor.empty() : tensor<2048xf32>
    %5267 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5631 : tensor<2048xi8>) outs(%5266 : tensor<2048xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5264
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5262
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<2048xf32>
    %cast_5635 = tensor.cast %5267 : tensor<2048xf32> to tensor<2048xf32>
    %int0_5636 = torch.constant.int 0
    %int0_5637 = torch.constant.int 0
    %int1_5638 = torch.constant.int 1
    %int1_5639 = torch.constant.int 1
    %int1_5640 = torch.constant.int 1
    %int1_5641 = torch.constant.int 1
    %int0_5642 = torch.constant.int 0
    %5268 = torch.prim.ListConstruct %int0_5636, %int0_5637 : (!torch.int, !torch.int) -> !torch.list<int>
    %5269 = torch.prim.ListConstruct %int1_5638, %int1_5639 : (!torch.int, !torch.int) -> !torch.list<int>
    %5270 = torch.prim.ListConstruct %int1_5640, %int1_5641 : (!torch.int, !torch.int) -> !torch.list<int>
    %5271 = torch.prim.ListConstruct %int0_5642, %int0_5642 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_5643 = torch.constant.bool false
    %int1_5644 = torch.constant.int 1
    %5272 = torch_c.to_i64 %int1_5644
    %5273 = torch_c.to_i64 %int0_5636
    %5274 = torch_c.to_i64 %int0_5637
    %5275 = torch_c.to_i64 %int0_5642
    %5276 = torch_c.to_i64 %int0_5642
    %c0_5645 = arith.constant 0 : index
    %c1_5646 = arith.constant 1 : index
    %c1_5647 = arith.constant 1 : index
    %c512_5648 = arith.constant 512 : index
    %c2_5649 = arith.constant 2 : index
    %c7_5650 = arith.constant 7 : index
    %c3_5651 = arith.constant 3 : index
    %c7_5652 = arith.constant 7 : index
    %c0_5653 = arith.constant 0 : index
    %c2048_5654 = arith.constant 2048 : index
    %c1_5655 = arith.constant 1 : index
    %c512_5656 = arith.constant 512 : index
    %c2_5657 = arith.constant 2 : index
    %c1_5658 = arith.constant 1 : index
    %c3_5659 = arith.constant 3 : index
    %c1_5660 = arith.constant 1 : index
    %5277 = arith.index_cast %5272 : i64 to index
    %c0_5661 = arith.constant 0 : index
    %5278 = arith.remsi %c512_5648, %5277 : index
    %5279 = arith.cmpi eq, %c0_5661, %5278 : index
    cf.assert %5279, "invalid: groups must divide input channel size evenly."
    %c0_5662 = arith.constant 0 : index
    %5280 = arith.remsi %c2048_5654, %5277 : index
    %5281 = arith.cmpi eq, %c0_5662, %5280 : index
    cf.assert %5281, "invalid: groups must divide weight batch size evenly."
    %c1_i64_5663 = arith.constant 1 : i64
    %c1_i64_5664 = arith.constant 1 : i64
    %c1_i64_5665 = arith.constant 1 : i64
    %c1_i64_5666 = arith.constant 1 : i64
    %cst_5667 = arith.constant 0.000000e+00 : f32
    %c0_5668 = arith.constant 0 : index
    %c1_5669 = arith.constant 1 : index
    %c1_5670 = arith.constant 1 : index
    %c512_5671 = arith.constant 512 : index
    %c2_5672 = arith.constant 2 : index
    %c7_5673 = arith.constant 7 : index
    %c3_5674 = arith.constant 3 : index
    %c7_5675 = arith.constant 7 : index
    %c0_i64_5676 = arith.constant 0 : i64
    %5282 = arith.index_cast %c0_i64_5676 : i64 to index
    %5283 = arith.index_cast %c0_i64_5676 : i64 to index
    %5284 = arith.index_cast %5273 : i64 to index
    %5285 = arith.index_cast %5274 : i64 to index
    %padded_5677 = tensor.pad %cast_5609 low[%5282, %5283, %5284, %5285] high[%5282, %5283, %5284, %5285] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_5667 : f32
    } : tensor<1x512x7x7xf32> to tensor<?x?x?x?xf32>
    %5286 = arith.index_cast %c1_5658 : index to i64
    %c1_i64_5678 = arith.constant 1 : i64
    %c2_i64_5679 = arith.constant 2 : i64
    %5287 = arith.muli %5273, %c2_i64_5679 : i64
    %5288 = arith.index_cast %c7_5650 : index to i64
    %5289 = arith.addi %5288, %5287 : i64
    %5290 = arith.subi %5286, %c1_i64_5678 : i64
    %5291 = arith.muli %c1_i64_5663, %5290 : i64
    %5292 = arith.subi %5289, %5291 : i64
    %5293 = arith.subi %5292, %c1_i64_5678 : i64
    %5294 = arith.floordivsi %5293, %c1_i64_5665 : i64
    %5295 = arith.addi %5294, %c1_i64_5678 : i64
    %5296 = arith.index_cast %5295 : i64 to index
    %5297 = arith.index_cast %c1_5660 : index to i64
    %c1_i64_5680 = arith.constant 1 : i64
    %c2_i64_5681 = arith.constant 2 : i64
    %5298 = arith.muli %5274, %c2_i64_5681 : i64
    %5299 = arith.index_cast %c7_5652 : index to i64
    %5300 = arith.addi %5299, %5298 : i64
    %5301 = arith.subi %5297, %c1_i64_5680 : i64
    %5302 = arith.muli %c1_i64_5664, %5301 : i64
    %5303 = arith.subi %5300, %5302 : i64
    %5304 = arith.subi %5303, %c1_i64_5680 : i64
    %5305 = arith.floordivsi %5304, %c1_i64_5666 : i64
    %5306 = arith.addi %5305, %c1_i64_5680 : i64
    %5307 = arith.index_cast %5306 : i64 to index
    %5308 = tensor.empty(%5296, %5307) : tensor<1x2048x?x?xf32>
    %5309 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5635 : tensor<2048xf32>) outs(%5308 : tensor<1x2048x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x2048x?x?xf32>
    %5310 = arith.floordivsi %c512_5648, %5277 : index
    %5311 = arith.floordivsi %c2048_5654, %5277 : index
    %c0_5682 = arith.constant 0 : index
    %c1_5683 = arith.constant 1 : index
    %5312 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5677, %cast_5624 : tensor<?x?x?x?xf32>, tensor<2048x512x1x1xf32>) outs(%5309 : tensor<1x2048x?x?xf32>) -> tensor<1x2048x?x?xf32>
    %cast_5684 = tensor.cast %5312 : tensor<1x2048x?x?xf32> to tensor<1x2048x7x7xf32>
    %5313 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5314 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5685 = torch.constant.int 12
    %5315 = torch.aten.item %5313 : !torch.vtensor<[],f32> -> !torch.float
    %5316 = torch_c.to_f64 %5315
    %5317 = torch.aten.item %5314 : !torch.vtensor<[],si8> -> !torch.int
    %5318 = torch_c.to_i64 %5317
    %c1_5686 = arith.constant 1 : index
    %c1_5687 = arith.constant 1 : index
    %c2048_5688 = arith.constant 2048 : index
    %c2_5689 = arith.constant 2 : index
    %c7_5690 = arith.constant 7 : index
    %c3_5691 = arith.constant 3 : index
    %c7_5692 = arith.constant 7 : index
    %5319 = tensor.empty() : tensor<1x2048x7x7xi8>
    %5320 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5684 : tensor<1x2048x7x7xf32>) outs(%5319 : tensor<1x2048x7x7xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5317
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5315
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x2048x7x7xi8>
    %cast_5693 = tensor.cast %5320 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %cast_5694 = tensor.cast %cast_5693 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %5321 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5322 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5323 = torch.aten.item %5321 : !torch.vtensor<[],f32> -> !torch.float
    %5324 = torch_c.to_f64 %5323
    %5325 = torch.aten.item %5322 : !torch.vtensor<[],si8> -> !torch.int
    %5326 = torch_c.to_i64 %5325
    %cast_5695 = tensor.cast %cast_5694 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %c1_5696 = arith.constant 1 : index
    %c1_5697 = arith.constant 1 : index
    %c2048_5698 = arith.constant 2048 : index
    %c2_5699 = arith.constant 2 : index
    %c7_5700 = arith.constant 7 : index
    %c3_5701 = arith.constant 3 : index
    %c7_5702 = arith.constant 7 : index
    %5327 = tensor.empty() : tensor<1x2048x7x7xf32>
    %5328 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5695 : tensor<1x2048x7x7xi8>) outs(%5327 : tensor<1x2048x7x7xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5325
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5323
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x2048x7x7xf32>
    %cast_5703 = tensor.cast %5328 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
    %int1_5704 = torch.constant.int 1
    %5329 = torch_c.to_i64 %int1_5704
    %c1_5705 = arith.constant 1 : index
    %c1_5706 = arith.constant 1 : index
    %c2048_5707 = arith.constant 2048 : index
    %c2_5708 = arith.constant 2 : index
    %c7_5709 = arith.constant 7 : index
    %c3_5710 = arith.constant 3 : index
    %c7_5711 = arith.constant 7 : index
    %c1_5712 = arith.constant 1 : index
    %c2048_5713 = arith.constant 2048 : index
    %5330 = arith.cmpi eq, %c2048_5707, %c2048_5713 : index
    cf.assert %5330, "mismatched size for broadcast"
    %c2_5714 = arith.constant 2 : index
    %c7_5715 = arith.constant 7 : index
    %5331 = arith.cmpi eq, %c7_5709, %c7_5715 : index
    cf.assert %5331, "mismatched size for broadcast"
    %c3_5716 = arith.constant 3 : index
    %c7_5717 = arith.constant 7 : index
    %5332 = arith.cmpi eq, %c7_5711, %c7_5717 : index
    cf.assert %5332, "mismatched size for broadcast"
    %5333 = tensor.empty() : tensor<1x2048x7x7xf32>
    %5334 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5703, %cast_5397 : tensor<1x2048x7x7xf32>, tensor<1x2048x7x7xf32>) outs(%5333 : tensor<1x2048x7x7xf32>) {
    ^bb0(%in: f32, %in_6197: f32, %out: f32):
      %5774 = arith.sitofp %5329 : i64 to f32
      %5775 = arith.mulf %in_6197, %5774 : f32
      %5776 = arith.addf %in, %5775 : f32
      linalg.yield %5776 : f32
    } -> tensor<1x2048x7x7xf32>
    %cast_5718 = tensor.cast %5334 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
    %c1_5719 = arith.constant 1 : index
    %c1_5720 = arith.constant 1 : index
    %c2048_5721 = arith.constant 2048 : index
    %c2_5722 = arith.constant 2 : index
    %c7_5723 = arith.constant 7 : index
    %c3_5724 = arith.constant 3 : index
    %c7_5725 = arith.constant 7 : index
    %5335 = tensor.empty() : tensor<1x2048x7x7xf32>
    %5336 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5718 : tensor<1x2048x7x7xf32>) outs(%5335 : tensor<1x2048x7x7xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x2048x7x7xf32>
    %cast_5726 = tensor.cast %5336 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
    %5337 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5338 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5727 = torch.constant.int 12
    %5339 = torch.aten.item %5337 : !torch.vtensor<[],f32> -> !torch.float
    %5340 = torch_c.to_f64 %5339
    %5341 = torch.aten.item %5338 : !torch.vtensor<[],si8> -> !torch.int
    %5342 = torch_c.to_i64 %5341
    %c1_5728 = arith.constant 1 : index
    %c1_5729 = arith.constant 1 : index
    %c2048_5730 = arith.constant 2048 : index
    %c2_5731 = arith.constant 2 : index
    %c7_5732 = arith.constant 7 : index
    %c3_5733 = arith.constant 3 : index
    %c7_5734 = arith.constant 7 : index
    %5343 = tensor.empty() : tensor<1x2048x7x7xi8>
    %5344 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5726 : tensor<1x2048x7x7xf32>) outs(%5343 : tensor<1x2048x7x7xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5341
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5339
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x2048x7x7xi8>
    %cast_5735 = tensor.cast %5344 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %cast_5736 = tensor.cast %cast_5735 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %5345 = torch.vtensor.literal(dense<3.125000e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5346 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5347 = torch.aten.item %5345 : !torch.vtensor<[],f32> -> !torch.float
    %5348 = torch_c.to_f64 %5347
    %5349 = torch.aten.item %5346 : !torch.vtensor<[],si8> -> !torch.int
    %5350 = torch_c.to_i64 %5349
    %cast_5737 = tensor.cast %cast_5736 : tensor<1x2048x7x7xi8> to tensor<1x2048x7x7xi8>
    %c1_5738 = arith.constant 1 : index
    %c1_5739 = arith.constant 1 : index
    %c2048_5740 = arith.constant 2048 : index
    %c2_5741 = arith.constant 2 : index
    %c7_5742 = arith.constant 7 : index
    %c3_5743 = arith.constant 3 : index
    %c7_5744 = arith.constant 7 : index
    %5351 = tensor.empty() : tensor<1x2048x7x7xf32>
    %5352 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5737 : tensor<1x2048x7x7xi8>) outs(%5351 : tensor<1x2048x7x7xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5349
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5347
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x2048x7x7xf32>
    %cast_5745 = tensor.cast %5352 : tensor<1x2048x7x7xf32> to tensor<1x2048x7x7xf32>
    %5353 = torch.vtensor.literal(dense<4.8828125E-4> : tensor<f32>) : !torch.vtensor<[],f32>
    %5354 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5746 = torch.constant.int 12
    %5355 = torch.aten.item %5353 : !torch.vtensor<[],f32> -> !torch.float
    %5356 = torch_c.to_f64 %5355
    %5357 = torch.aten.item %5354 : !torch.vtensor<[],si8> -> !torch.int
    %5358 = torch_c.to_i64 %5357
    %c1_5747 = arith.constant 1 : index
    %c0_5748 = arith.constant 0 : index
    %c512_5749 = arith.constant 512 : index
    %c1_5750 = arith.constant 1 : index
    %c2048_5751 = arith.constant 2048 : index
    %5359 = tensor.empty() : tensor<512x2048x1x1xi8>
    %5360 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%202 : tensor<512x2048x1x1xf32>) outs(%5359 : tensor<512x2048x1x1xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5357
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5355
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512x2048x1x1xi8>
    %cast_5752 = tensor.cast %5360 : tensor<512x2048x1x1xi8> to tensor<512x2048x1x1xi8>
    %cast_5753 = tensor.cast %cast_5752 : tensor<512x2048x1x1xi8> to tensor<512x2048x1x1xi8>
    %5361 = torch.vtensor.literal(dense<4.8828125E-4> : tensor<f32>) : !torch.vtensor<[],f32>
    %5362 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5363 = torch.aten.item %5361 : !torch.vtensor<[],f32> -> !torch.float
    %5364 = torch_c.to_f64 %5363
    %5365 = torch.aten.item %5362 : !torch.vtensor<[],si8> -> !torch.int
    %5366 = torch_c.to_i64 %5365
    %cast_5754 = tensor.cast %cast_5753 : tensor<512x2048x1x1xi8> to tensor<512x2048x1x1xi8>
    %c1_5755 = arith.constant 1 : index
    %c0_5756 = arith.constant 0 : index
    %c512_5757 = arith.constant 512 : index
    %c1_5758 = arith.constant 1 : index
    %c2048_5759 = arith.constant 2048 : index
    %5367 = tensor.empty() : tensor<512x2048x1x1xf32>
    %5368 = linalg.generic {indexing_maps = [#map4, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5754 : tensor<512x2048x1x1xi8>) outs(%5367 : tensor<512x2048x1x1xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5365
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5363
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512x2048x1x1xf32>
    %cast_5760 = tensor.cast %5368 : tensor<512x2048x1x1xf32> to tensor<512x2048x1x1xf32>
    %5369 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5370 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5761 = torch.constant.int 12
    %5371 = torch.aten.item %5369 : !torch.vtensor<[],f32> -> !torch.float
    %5372 = torch_c.to_f64 %5371
    %5373 = torch.aten.item %5370 : !torch.vtensor<[],si8> -> !torch.int
    %5374 = torch_c.to_i64 %5373
    %c1_5762 = arith.constant 1 : index
    %c0_5763 = arith.constant 0 : index
    %c512_5764 = arith.constant 512 : index
    %5375 = tensor.empty() : tensor<512xi8>
    %5376 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%204 : tensor<512xf32>) outs(%5375 : tensor<512xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5373
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5371
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512xi8>
    %cast_5765 = tensor.cast %5376 : tensor<512xi8> to tensor<512xi8>
    %cast_5766 = tensor.cast %cast_5765 : tensor<512xi8> to tensor<512xi8>
    %5377 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5378 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5379 = torch.aten.item %5377 : !torch.vtensor<[],f32> -> !torch.float
    %5380 = torch_c.to_f64 %5379
    %5381 = torch.aten.item %5378 : !torch.vtensor<[],si8> -> !torch.int
    %5382 = torch_c.to_i64 %5381
    %cast_5767 = tensor.cast %cast_5766 : tensor<512xi8> to tensor<512xi8>
    %c1_5768 = arith.constant 1 : index
    %c0_5769 = arith.constant 0 : index
    %c512_5770 = arith.constant 512 : index
    %5383 = tensor.empty() : tensor<512xf32>
    %5384 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%cast_5767 : tensor<512xi8>) outs(%5383 : tensor<512xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5381
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5379
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512xf32>
    %cast_5771 = tensor.cast %5384 : tensor<512xf32> to tensor<512xf32>
    %int0_5772 = torch.constant.int 0
    %int0_5773 = torch.constant.int 0
    %int1_5774 = torch.constant.int 1
    %int1_5775 = torch.constant.int 1
    %int1_5776 = torch.constant.int 1
    %int1_5777 = torch.constant.int 1
    %int0_5778 = torch.constant.int 0
    %5385 = torch.prim.ListConstruct %int0_5772, %int0_5773 : (!torch.int, !torch.int) -> !torch.list<int>
    %5386 = torch.prim.ListConstruct %int1_5774, %int1_5775 : (!torch.int, !torch.int) -> !torch.list<int>
    %5387 = torch.prim.ListConstruct %int1_5776, %int1_5777 : (!torch.int, !torch.int) -> !torch.list<int>
    %5388 = torch.prim.ListConstruct %int0_5778, %int0_5778 : (!torch.int, !torch.int) -> !torch.list<int>
    %false_5779 = torch.constant.bool false
    %int1_5780 = torch.constant.int 1
    %5389 = torch_c.to_i64 %int1_5780
    %5390 = torch_c.to_i64 %int0_5772
    %5391 = torch_c.to_i64 %int0_5773
    %5392 = torch_c.to_i64 %int0_5778
    %5393 = torch_c.to_i64 %int0_5778
    %c0_5781 = arith.constant 0 : index
    %c1_5782 = arith.constant 1 : index
    %c1_5783 = arith.constant 1 : index
    %c2048_5784 = arith.constant 2048 : index
    %c2_5785 = arith.constant 2 : index
    %c7_5786 = arith.constant 7 : index
    %c3_5787 = arith.constant 3 : index
    %c7_5788 = arith.constant 7 : index
    %c0_5789 = arith.constant 0 : index
    %c512_5790 = arith.constant 512 : index
    %c1_5791 = arith.constant 1 : index
    %c2048_5792 = arith.constant 2048 : index
    %c2_5793 = arith.constant 2 : index
    %c1_5794 = arith.constant 1 : index
    %c3_5795 = arith.constant 3 : index
    %c1_5796 = arith.constant 1 : index
    %5394 = arith.index_cast %5389 : i64 to index
    %c0_5797 = arith.constant 0 : index
    %5395 = arith.remsi %c2048_5784, %5394 : index
    %5396 = arith.cmpi eq, %c0_5797, %5395 : index
    cf.assert %5396, "invalid: groups must divide input channel size evenly."
    %c0_5798 = arith.constant 0 : index
    %5397 = arith.remsi %c512_5790, %5394 : index
    %5398 = arith.cmpi eq, %c0_5798, %5397 : index
    cf.assert %5398, "invalid: groups must divide weight batch size evenly."
    %c1_i64_5799 = arith.constant 1 : i64
    %c1_i64_5800 = arith.constant 1 : i64
    %c1_i64_5801 = arith.constant 1 : i64
    %c1_i64_5802 = arith.constant 1 : i64
    %cst_5803 = arith.constant 0.000000e+00 : f32
    %c0_5804 = arith.constant 0 : index
    %c1_5805 = arith.constant 1 : index
    %c1_5806 = arith.constant 1 : index
    %c2048_5807 = arith.constant 2048 : index
    %c2_5808 = arith.constant 2 : index
    %c7_5809 = arith.constant 7 : index
    %c3_5810 = arith.constant 3 : index
    %c7_5811 = arith.constant 7 : index
    %c0_i64_5812 = arith.constant 0 : i64
    %5399 = arith.index_cast %c0_i64_5812 : i64 to index
    %5400 = arith.index_cast %c0_i64_5812 : i64 to index
    %5401 = arith.index_cast %5390 : i64 to index
    %5402 = arith.index_cast %5391 : i64 to index
    %padded_5813 = tensor.pad %cast_5745 low[%5399, %5400, %5401, %5402] high[%5399, %5400, %5401, %5402] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_5803 : f32
    } : tensor<1x2048x7x7xf32> to tensor<?x?x?x?xf32>
    %5403 = arith.index_cast %c1_5794 : index to i64
    %c1_i64_5814 = arith.constant 1 : i64
    %c2_i64_5815 = arith.constant 2 : i64
    %5404 = arith.muli %5390, %c2_i64_5815 : i64
    %5405 = arith.index_cast %c7_5786 : index to i64
    %5406 = arith.addi %5405, %5404 : i64
    %5407 = arith.subi %5403, %c1_i64_5814 : i64
    %5408 = arith.muli %c1_i64_5799, %5407 : i64
    %5409 = arith.subi %5406, %5408 : i64
    %5410 = arith.subi %5409, %c1_i64_5814 : i64
    %5411 = arith.floordivsi %5410, %c1_i64_5801 : i64
    %5412 = arith.addi %5411, %c1_i64_5814 : i64
    %5413 = arith.index_cast %5412 : i64 to index
    %5414 = arith.index_cast %c1_5796 : index to i64
    %c1_i64_5816 = arith.constant 1 : i64
    %c2_i64_5817 = arith.constant 2 : i64
    %5415 = arith.muli %5391, %c2_i64_5817 : i64
    %5416 = arith.index_cast %c7_5788 : index to i64
    %5417 = arith.addi %5416, %5415 : i64
    %5418 = arith.subi %5414, %c1_i64_5816 : i64
    %5419 = arith.muli %c1_i64_5800, %5418 : i64
    %5420 = arith.subi %5417, %5419 : i64
    %5421 = arith.subi %5420, %c1_i64_5816 : i64
    %5422 = arith.floordivsi %5421, %c1_i64_5802 : i64
    %5423 = arith.addi %5422, %c1_i64_5816 : i64
    %5424 = arith.index_cast %5423 : i64 to index
    %5425 = tensor.empty(%5413, %5424) : tensor<1x512x?x?xf32>
    %5426 = linalg.generic {indexing_maps = [#map3, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5771 : tensor<512xf32>) outs(%5425 : tensor<1x512x?x?xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x512x?x?xf32>
    %5427 = arith.floordivsi %c2048_5784, %5394 : index
    %5428 = arith.floordivsi %c512_5790, %5394 : index
    %c0_5818 = arith.constant 0 : index
    %c1_5819 = arith.constant 1 : index
    %5429 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5813, %cast_5760 : tensor<?x?x?x?xf32>, tensor<512x2048x1x1xf32>) outs(%5426 : tensor<1x512x?x?xf32>) -> tensor<1x512x?x?xf32>
    %cast_5820 = tensor.cast %5429 : tensor<1x512x?x?xf32> to tensor<1x512x7x7xf32>
    %c1_5821 = arith.constant 1 : index
    %c1_5822 = arith.constant 1 : index
    %c512_5823 = arith.constant 512 : index
    %c2_5824 = arith.constant 2 : index
    %c7_5825 = arith.constant 7 : index
    %c3_5826 = arith.constant 3 : index
    %c7_5827 = arith.constant 7 : index
    %5430 = tensor.empty() : tensor<1x512x7x7xf32>
    %5431 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5820 : tensor<1x512x7x7xf32>) outs(%5430 : tensor<1x512x7x7xf32>) {
    ^bb0(%in: f32, %out: f32):
      %cst_6197 = arith.constant 0.000000e+00 : f32
      %5774 = arith.cmpf ugt, %in, %cst_6197 : f32
      %5775 = arith.select %5774, %in, %cst_6197 : f32
      linalg.yield %5775 : f32
    } -> tensor<1x512x7x7xf32>
    %cast_5828 = tensor.cast %5431 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
    %5432 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5433 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5829 = torch.constant.int 12
    %5434 = torch.aten.item %5432 : !torch.vtensor<[],f32> -> !torch.float
    %5435 = torch_c.to_f64 %5434
    %5436 = torch.aten.item %5433 : !torch.vtensor<[],si8> -> !torch.int
    %5437 = torch_c.to_i64 %5436
    %c1_5830 = arith.constant 1 : index
    %c1_5831 = arith.constant 1 : index
    %c512_5832 = arith.constant 512 : index
    %c2_5833 = arith.constant 2 : index
    %c7_5834 = arith.constant 7 : index
    %c3_5835 = arith.constant 3 : index
    %c7_5836 = arith.constant 7 : index
    %5438 = tensor.empty() : tensor<1x512x7x7xi8>
    %5439 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5828 : tensor<1x512x7x7xf32>) outs(%5438 : tensor<1x512x7x7xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5436
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5434
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<1x512x7x7xi8>
    %cast_5837 = tensor.cast %5439 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
    %cast_5838 = tensor.cast %cast_5837 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
    %5440 = torch.vtensor.literal(dense<3.906250e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5441 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5442 = torch.aten.item %5440 : !torch.vtensor<[],f32> -> !torch.float
    %5443 = torch_c.to_f64 %5442
    %5444 = torch.aten.item %5441 : !torch.vtensor<[],si8> -> !torch.int
    %5445 = torch_c.to_i64 %5444
    %cast_5839 = tensor.cast %cast_5838 : tensor<1x512x7x7xi8> to tensor<1x512x7x7xi8>
    %c1_5840 = arith.constant 1 : index
    %c1_5841 = arith.constant 1 : index
    %c512_5842 = arith.constant 512 : index
    %c2_5843 = arith.constant 2 : index
    %c7_5844 = arith.constant 7 : index
    %c3_5845 = arith.constant 3 : index
    %c7_5846 = arith.constant 7 : index
    %5446 = tensor.empty() : tensor<1x512x7x7xf32>
    %5447 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5839 : tensor<1x512x7x7xi8>) outs(%5446 : tensor<1x512x7x7xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5444
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5442
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<1x512x7x7xf32>
    %cast_5847 = tensor.cast %5447 : tensor<1x512x7x7xf32> to tensor<1x512x7x7xf32>
    %5448 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5449 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5848 = torch.constant.int 12
    %5450 = torch.aten.item %5448 : !torch.vtensor<[],f32> -> !torch.float
    %5451 = torch_c.to_f64 %5450
    %5452 = torch.aten.item %5449 : !torch.vtensor<[],si8> -> !torch.int
    %5453 = torch_c.to_i64 %5452
    %c1_5849 = arith.constant 1 : index
    %c0_5850 = arith.constant 0 : index
    %c512_5851 = arith.constant 512 : index
    %c1_5852 = arith.constant 1 : index
    %c512_5853 = arith.constant 512 : index
    %c2_5854 = arith.constant 2 : index
    %c3_5855 = arith.constant 3 : index
    %c3_5856 = arith.constant 3 : index
    %c3_5857 = arith.constant 3 : index
    %5454 = tensor.empty() : tensor<512x512x3x3xi8>
    %5455 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%206 : tensor<512x512x3x3xf32>) outs(%5454 : tensor<512x512x3x3xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5452
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5450
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512x512x3x3xi8>
    %cast_5858 = tensor.cast %5455 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
    %cast_5859 = tensor.cast %cast_5858 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
    %5456 = torch.vtensor.literal(dense<7.812500e-03> : tensor<f32>) : !torch.vtensor<[],f32>
    %5457 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5458 = torch.aten.item %5456 : !torch.vtensor<[],f32> -> !torch.float
    %5459 = torch_c.to_f64 %5458
    %5460 = torch.aten.item %5457 : !torch.vtensor<[],si8> -> !torch.int
    %5461 = torch_c.to_i64 %5460
    %cast_5860 = tensor.cast %cast_5859 : tensor<512x512x3x3xi8> to tensor<512x512x3x3xi8>
    %c1_5861 = arith.constant 1 : index
    %c0_5862 = arith.constant 0 : index
    %c512_5863 = arith.constant 512 : index
    %c1_5864 = arith.constant 1 : index
    %c512_5865 = arith.constant 512 : index
    %c2_5866 = arith.constant 2 : index
    %c3_5867 = arith.constant 3 : index
    %c3_5868 = arith.constant 3 : index
    %c3_5869 = arith.constant 3 : index
    %5462 = tensor.empty() : tensor<512x512x3x3xf32>
    %5463 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cast_5860 : tensor<512x512x3x3xi8>) outs(%5462 : tensor<512x512x3x3xf32>) {
    ^bb0(%in: i8, %out: f32):
      %5774 = arith.extsi %in : i8 to i32
      %5775 = torch_c.to_i64 %5460
      %5776 = arith.trunci %5775 : i64 to i32
      %5777 = arith.subi %5774, %5776 : i32
      %5778 = arith.sitofp %5777 : i32 to f32
      %5779 = torch_c.to_f64 %5458
      %5780 = arith.truncf %5779 : f64 to f32
      %5781 = arith.mulf %5778, %5780 : f32
      linalg.yield %5781 : f32
    } -> tensor<512x512x3x3xf32>
    %cast_5870 = tensor.cast %5463 : tensor<512x512x3x3xf32> to tensor<512x512x3x3xf32>
    %5464 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5465 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %int12_5871 = torch.constant.int 12
    %5466 = torch.aten.item %5464 : !torch.vtensor<[],f32> -> !torch.float
    %5467 = torch_c.to_f64 %5466
    %5468 = torch.aten.item %5465 : !torch.vtensor<[],si8> -> !torch.int
    %5469 = torch_c.to_i64 %5468
    %c1_5872 = arith.constant 1 : index
    %c0_5873 = arith.constant 0 : index
    %c512_5874 = arith.constant 512 : index
    %5470 = tensor.empty() : tensor<512xi8>
    %5471 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel"]} ins(%208 : tensor<512xf32>) outs(%5470 : tensor<512xi8>) {
    ^bb0(%in: f32, %out: i8):
      %5774 = torch_c.to_i64 %5468
      %5775 = arith.sitofp %5774 : i64 to f32
      %5776 = torch_c.to_f64 %5466
      %5777 = arith.truncf %5776 : f64 to f32
      %5778 = arith.divf %in, %5777 : f32
      %5779 = math.round %5778 : f32
      %5780 = arith.addf %5779, %5775 : f32
      %cst_6197 = arith.constant -1.280000e+02 : f32
      %cst_6198 = arith.constant 1.270000e+02 : f32
      %5781 = arith.cmpf ult, %5780, %cst_6197 : f32
      %5782 = arith.cmpf ugt, %5780, %cst_6198 : f32
      %5783 = arith.select %5781, %cst_6197, %5780 : f32
      %5784 = arith.select %5782, %cst_6198, %5783 : f32
      %5785 = arith.fptosi %5784 : f32 to i8
      linalg.yield %5785 : i8
    } -> tensor<512xi8>
    %cast_5875 = tensor.cast %5471 : tensor<512xi8> to tensor<512xi8>
    %cast_5876 = tensor.cast %cast_5875 : tensor<512xi8> to tensor<512xi8>
    %5472 = torch.vtensor.literal(dense<1.562500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %5473 = torch.vtensor.literal(dense<0> : tensor<si8>) : !torch.vtensor<[],si8>
    %5474 = torch.aten.item %5472 : !torch.vtensor<[],f32> -> !torch.float
    %5475 = torch_c.to_
No results found