AmosLewis · May 1, 2024 16:30
diff --git a/gpt2-xl.default.pytorch.torch.elide.mlir b/gpt2-xl.default.pytorch.torch.elide.mlir
 module {
  func.func @main_graph(%arg0: !torch.vtensor<[1,7],si64>) -> (!torch.vtensor<[1,7,50257],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>) attributes {torch.onnx_meta.ir_version = 8 : si64, torch.onnx_meta.opset_version = 17 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "2.4.0"} {
    %0 = torch.vtensor.literal(dense_resource<__elided__> : tensor<50257x1600xf32>) : !torch.vtensor<[50257,1600],f32>
    %1 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x1600xf32>) : !torch.vtensor<[1024,1600],f32>
    %2 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %3 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %4 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %5 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %6 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %7 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %8 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %9 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %10 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %12 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %14 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %16 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %18 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %20 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %22 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %24 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %26 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %28 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %30 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %32 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %34 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %36 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %38 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %40 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %42 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %44 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %46 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %48 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %49 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %50 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %51 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %52 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %53 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %54 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %55 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %56 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %57 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %58 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %59 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %60 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %61 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %62 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %63 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %64 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %65 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %66 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %67 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %68 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %69 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %70 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %71 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %72 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %73 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %74 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %75 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %76 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %77 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %78 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %79 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %80 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %81 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %82 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %83 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %84 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %85 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %86 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %87 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %88 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %89 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %90 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %91 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %92 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %93 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %94 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %95 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %96 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %97 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %98 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %99 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %100 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %101 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %102 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %103 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %104 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %105 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %106 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %107 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %108 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %109 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %110 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %111 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %112 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %113 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %114 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %115 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %116 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %117 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %118 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %119 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %120 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %121 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %122 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %123 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %124 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %125 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %126 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %127 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %128 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %129 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %130 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %131 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %132 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %133 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %134 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %135 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %136 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %137 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %138 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %139 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %140 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %141 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %142 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %143 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %144 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %145 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %146 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %147 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %148 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %149 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %150 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %151 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %152 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %153 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %154 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %155 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %156 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %157 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %158 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %159 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %160 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %161 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %162 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %163 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %164 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %165 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %166 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %167 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %168 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %169 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %170 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %171 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %172 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %173 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %174 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %175 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %176 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %177 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %178 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %179 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %180 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %181 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %182 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %183 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %184 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %185 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %186 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %187 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %188 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %189 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %190 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %191 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %192 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %193 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %194 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %195 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %196 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %197 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %198 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %199 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %200 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %201 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %202 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %203 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %204 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %205 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %206 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %207 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %208 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %209 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %210 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %211 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %212 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %213 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %214 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %215 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %216 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %217 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %218 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %219 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %220 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %221 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %222 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %223 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %224 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %225 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %226 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %227 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %228 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %229 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %230 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %231 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %232 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %233 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %234 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %235 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %236 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %237 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %238 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %239 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %240 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %241 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %242 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %243 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %244 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %245 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %246 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %247 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %248 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %249 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %250 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %251 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %252 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %253 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %254 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %255 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %256 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %257 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %258 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %259 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %260 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %261 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %262 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %263 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %264 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %265 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %266 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %267 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %268 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %269 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %270 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %271 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %272 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %273 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %274 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %275 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %276 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %277 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %278 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %279 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %280 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %281 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %282 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %283 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %284 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %285 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %286 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %287 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %288 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %289 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %290 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %291 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %292 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %293 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %294 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %295 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %296 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %297 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %298 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %299 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %300 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %301 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %302 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %303 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %304 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %305 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %306 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %307 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %308 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %309 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %310 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %311 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %312 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %313 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %314 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %315 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %316 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %317 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %318 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %319 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %320 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %321 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %322 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %323 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %324 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %325 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %326 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %327 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %328 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %329 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %330 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %331 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %332 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %333 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %334 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %335 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %336 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %337 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %338 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %339 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %340 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %341 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %342 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %343 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %344 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %345 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %346 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %347 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %348 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %349 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %350 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %351 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %352 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %353 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %354 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %355 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %356 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %357 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %358 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %359 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %360 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %361 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %362 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %363 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %364 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %365 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %366 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %367 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %368 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %369 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %370 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %371 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %372 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %373 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %374 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %375 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %376 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %377 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %378 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %379 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %380 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %381 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %382 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %383 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %384 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %385 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %386 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %387 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %388 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %389 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %390 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %391 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %392 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %393 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %394 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %395 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %396 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %397 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %398 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %399 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %400 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %401 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %402 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %403 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %404 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %405 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %406 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %407 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %408 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %409 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %410 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %411 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %412 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %413 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %414 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %415 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %416 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %417 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %418 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %419 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %420 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %421 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %422 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %423 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %424 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %425 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %426 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %427 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %428 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %429 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %430 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %431 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %432 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %433 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %434 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %435 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %436 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %437 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %438 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %439 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %440 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %441 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %442 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %443 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %444 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %445 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %446 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %447 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %448 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %449 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %450 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %451 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %452 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %453 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %454 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %455 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %456 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %457 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %458 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %459 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %460 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %461 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %462 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %463 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %464 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %465 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %466 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %467 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %468 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %469 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %470 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %471 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %472 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %473 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %474 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %475 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %476 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %477 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %478 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %479 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %480 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %481 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %482 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %483 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %484 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %485 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %486 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %487 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %488 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %489 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %490 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %491 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %492 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %493 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %494 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %495 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %496 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %497 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %498 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %499 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %500 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %501 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %502 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %503 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %504 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %505 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %506 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %507 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %508 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %509 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %510 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %511 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %512 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %513 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %514 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %515 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %516 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %517 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %518 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %519 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %520 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %521 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %522 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %523 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %524 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %525 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %526 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %527 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %528 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %529 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %530 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %531 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %532 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %533 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %534 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %535 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %536 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %537 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %538 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %539 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %540 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %541 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %542 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %543 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %544 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %545 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %546 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %547 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %548 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %549 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %550 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %551 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %552 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %553 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %554 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %555 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %556 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %557 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %558 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %559 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %560 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %561 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %562 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %563 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %564 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %565 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %566 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %567 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %568 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x4800xf32>) : !torch.vtensor<[1600,4800],f32>
    %569 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4800xf32>) : !torch.vtensor<[4800],f32>
    %570 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x1600xf32>) : !torch.vtensor<[1600,1600],f32>
    %571 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %572 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %573 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %574 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x6400xf32>) : !torch.vtensor<[1600,6400],f32>
    %575 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400xf32>) : !torch.vtensor<[6400],f32>
    %576 = torch.vtensor.literal(dense_resource<__elided__> : tensor<6400x1600xf32>) : !torch.vtensor<[6400,1600],f32>
    %577 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %578 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %579 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600xf32>) : !torch.vtensor<[1600],f32>
    %580 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1600x50257xf32>) : !torch.vtensor<[1600,50257],f32>
    %none = torch.constant.none
    %581 = torch.vtensor.literal(dense<[-1, 7]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int1 = torch.constant.int 1
    %int7 = torch.constant.int 7
    %582 = torch.prim.ListConstruct %int1, %int7 : (!torch.int, !torch.int) -> !torch.list<int>
    %583 = torch.aten.reshape %arg0, %582 : !torch.vtensor<[1,7],si64>, !torch.list<int> -> !torch.vtensor<[1,7],si64>
    %584 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x7xsi64>) : !torch.vtensor<[1,7],si64>
    %int0 = torch.constant.int 0
    %int0_0 = torch.constant.int 0
    %int1_1 = torch.constant.int 1
    %585 = torch.aten.lt.Scalar %583, %int0_0 : !torch.vtensor<[1,7],si64>, !torch.int -> !torch.vtensor<[1,7],i1>
    %586 = torch.aten.size.int %0, %int0 : !torch.vtensor<[50257,1600],f32>, !torch.int -> !torch.int
    %587 = torch.aten.add.Scalar %583, %586, %int1_1 : !torch.vtensor<[1,7],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,7],si64>
    %588 = torch.aten.where.self %585, %587, %583 : !torch.vtensor<[1,7],i1>, !torch.vtensor<[1,7],si64>, !torch.vtensor<[1,7],si64> -> !torch.vtensor<[1,7],si64>
    %int0_2 = torch.constant.int 0
    %589 = torch.aten.size.int %588, %int0_2 : !torch.vtensor<[1,7],si64>, !torch.int -> !torch.int
    %int1_3 = torch.constant.int 1
    %590 = torch.aten.size.int %588, %int1_3 : !torch.vtensor<[1,7],si64>, !torch.int -> !torch.int
    %591 = torch.prim.ListConstruct %589, %590 : (!torch.int, !torch.int) -> !torch.list<int>
    %592 = torch.aten.dim %588 : !torch.vtensor<[1,7],si64> -> !torch.int
    %593 = torch.aten.sub.int %592, %int1_1 : !torch.int, !torch.int -> !torch.int
    %594 = torch.aten.flatten.using_ints %588, %int0_0, %593 : !torch.vtensor<[1,7],si64>, !torch.int, !torch.int -> !torch.vtensor<[7],si64>
    %595 = torch.aten.index_select %0, %int0, %594 : !torch.vtensor<[50257,1600],f32>, !torch.int, !torch.vtensor<[7],si64> -> !torch.vtensor<[7,1600],f32>
    %596 = torch.aten.unflatten.int %595, %int0, %591 : !torch.vtensor<[7,1600],f32>, !torch.int, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int0_4 = torch.constant.int 0
    %int0_5 = torch.constant.int 0
    %int1_6 = torch.constant.int 1
    %597 = torch.aten.lt.Scalar %584, %int0_5 : !torch.vtensor<[1,7],si64>, !torch.int -> !torch.vtensor<[1,7],i1>
    %598 = torch.aten.size.int %1, %int0_4 : !torch.vtensor<[1024,1600],f32>, !torch.int -> !torch.int
    %599 = torch.aten.add.Scalar %584, %598, %int1_6 : !torch.vtensor<[1,7],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,7],si64>
    %600 = torch.aten.where.self %597, %599, %584 : !torch.vtensor<[1,7],i1>, !torch.vtensor<[1,7],si64>, !torch.vtensor<[1,7],si64> -> !torch.vtensor<[1,7],si64>
    %int0_7 = torch.constant.int 0
    %601 = torch.aten.size.int %600, %int0_7 : !torch.vtensor<[1,7],si64>, !torch.int -> !torch.int
    %int1_8 = torch.constant.int 1
    %602 = torch.aten.size.int %600, %int1_8 : !torch.vtensor<[1,7],si64>, !torch.int -> !torch.int
    %603 = torch.prim.ListConstruct %601, %602 : (!torch.int, !torch.int) -> !torch.list<int>
    %604 = torch.aten.dim %600 : !torch.vtensor<[1,7],si64> -> !torch.int
    %605 = torch.aten.sub.int %604, %int1_6 : !torch.int, !torch.int -> !torch.int
    %606 = torch.aten.flatten.using_ints %600, %int0_5, %605 : !torch.vtensor<[1,7],si64>, !torch.int, !torch.int -> !torch.vtensor<[7],si64>
    %607 = torch.aten.index_select %1, %int0_4, %606 : !torch.vtensor<[1024,1600],f32>, !torch.int, !torch.vtensor<[7],si64> -> !torch.vtensor<[7,1600],f32>
    %608 = torch.aten.unflatten.int %607, %int0_4, %603 : !torch.vtensor<[7,1600],f32>, !torch.int, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_9 = torch.constant.int 1
    %609 = torch.aten.add.Tensor %596, %608, %int1_9 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06 = torch.constant.float 9.9999997473787516E-6
    %int1600 = torch.constant.int 1600
    %610 = torch.prim.ListConstruct %int1600 : (!torch.int) -> !torch.list<int>
    %result0, %result1, %result2 = torch.aten.native_layer_norm %609, %610, %2, %3, %float9.999990e-06 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %611 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_10 = torch.constant.int 7
    %int1600_11 = torch.constant.int 1600
    %612 = torch.prim.ListConstruct %int7_10, %int1600_11 : (!torch.int, !torch.int) -> !torch.list<int>
    %613 = torch.aten.reshape %result0, %612 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_12 = torch.constant.int 0
    %int1_13 = torch.constant.int 1
    %614 = torch.aten.mm %613, %4 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %615 = torch.aten.add.Tensor %614, %5, %int1_13 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %616 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_14 = torch.constant.int 1
    %int7_15 = torch.constant.int 7
    %int4800 = torch.constant.int 4800
    %617 = torch.prim.ListConstruct %int1_14, %int7_15, %int4800 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %618 = torch.aten.reshape %615, %617 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %619 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %620 = torch.prim.tolist(%619) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2 = torch.constant.int 2
    %621 = torch.aten.split_with_sizes %618, %620, %int2 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %622:3 = torch.prim.ListUnpack %621 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %623 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_16 = torch.constant.int 1
    %int7_17 = torch.constant.int 7
    %int25 = torch.constant.int 25
    %int64 = torch.constant.int 64
    %624 = torch.prim.ListConstruct %int1_16, %int7_17, %int25, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %625 = torch.aten.reshape %622#0, %624 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_18 = torch.constant.int 1
    %int2_19 = torch.constant.int 2
    %626 = torch.aten.transpose.int %625, %int1_18, %int2_19 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %627 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_20 = torch.constant.int 1
    %int7_21 = torch.constant.int 7
    %int25_22 = torch.constant.int 25
    %int64_23 = torch.constant.int 64
    %628 = torch.prim.ListConstruct %int1_20, %int7_21, %int25_22, %int64_23 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %629 = torch.aten.reshape %622#1, %628 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_24 = torch.constant.int 1
    %int2_25 = torch.constant.int 2
    %630 = torch.aten.transpose.int %629, %int1_24, %int2_25 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %631 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_26 = torch.constant.int 1
    %int7_27 = torch.constant.int 7
    %int25_28 = torch.constant.int 25
    %int64_29 = torch.constant.int 64
    %632 = torch.prim.ListConstruct %int1_26, %int7_27, %int25_28, %int64_29 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %633 = torch.aten.reshape %622#2, %632 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_30 = torch.constant.int 1
    %int2_31 = torch.constant.int 2
    %634 = torch.aten.transpose.int %633, %int1_30, %int2_31 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_32 = torch.constant.int 1
    %int2_33 = torch.constant.int 2
    %635 = torch.aten.transpose.int %629, %int1_32, %int2_33 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_34 = torch.constant.int 2
    %int3 = torch.constant.int 3
    %636 = torch.aten.transpose.int %635, %int2_34, %int3 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %637 = torch.aten.matmul %626, %636 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %638 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %639 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %640 = torch.aten.pow.Tensor_Tensor %638, %639 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %641 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_35 = torch.constant.int 0
    %642 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_36 = torch.constant.none
    %float0.000000e00 = torch.constant.float 0.000000e+00
    %643 = torch.aten.full %642, %float0.000000e00, %none_36, %none_36, %none_36, %none_36 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_37 = torch.constant.int 1
    %644 = torch.aten.add.Tensor %643, %640, %int1_37 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %645 = torch.aten.div.Tensor %637, %644 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %646 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6 = torch.constant.int 6
    %none_38 = torch.constant.none
    %false = torch.constant.bool false
    %647 = torch.aten.to.dtype %645, %int6, %false, %false, %none_38 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %648 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %649 = torch.aten.where.self %646, %647, %648 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_39 = torch.constant.int 3
    %none_40 = torch.constant.none
    %650 = torch.aten.softmax.int %649, %int3_39, %none_40 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_41 = torch.constant.int 6
    %none_42 = torch.constant.none
    %false_43 = torch.constant.bool false
    %651 = torch.aten.to.dtype %650, %int6_41, %false_43, %false_43, %none_42 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %652 = torch.aten.matmul %651, %634 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_44 = torch.constant.int 1
    %int2_45 = torch.constant.int 2
    %653 = torch.aten.transpose.int %652, %int1_44, %int2_45 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %654 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_46 = torch.constant.int 1
    %int7_47 = torch.constant.int 7
    %int1600_48 = torch.constant.int 1600
    %655 = torch.prim.ListConstruct %int1_46, %int7_47, %int1600_48 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %656 = torch.aten.reshape %653, %655 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %657 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_49 = torch.constant.int 7
    %int1600_50 = torch.constant.int 1600
    %658 = torch.prim.ListConstruct %int7_49, %int1600_50 : (!torch.int, !torch.int) -> !torch.list<int>
    %659 = torch.aten.reshape %656, %658 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_51 = torch.constant.int 0
    %int1_52 = torch.constant.int 1
    %660 = torch.aten.mm %659, %6 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %661 = torch.aten.add.Tensor %660, %7, %int1_52 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %662 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_53 = torch.constant.int 1
    %int7_54 = torch.constant.int 7
    %int1600_55 = torch.constant.int 1600
    %663 = torch.prim.ListConstruct %int1_53, %int7_54, %int1600_55 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %664 = torch.aten.reshape %661, %663 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_56 = torch.constant.int 1
    %665 = torch.aten.add.Tensor %664, %609, %int1_56 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_57 = torch.constant.float 9.9999997473787516E-6
    %int1600_58 = torch.constant.int 1600
    %666 = torch.prim.ListConstruct %int1600_58 : (!torch.int) -> !torch.list<int>
    %result0_59, %result1_60, %result2_61 = torch.aten.native_layer_norm %665, %666, %8, %9, %float9.999990e-06_57 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %667 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_62 = torch.constant.int 7
    %int1600_63 = torch.constant.int 1600
    %668 = torch.prim.ListConstruct %int7_62, %int1600_63 : (!torch.int, !torch.int) -> !torch.list<int>
    %669 = torch.aten.reshape %result0_59, %668 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_64 = torch.constant.int 0
    %int1_65 = torch.constant.int 1
    %670 = torch.aten.mm %669, %10 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %671 = torch.aten.add.Tensor %670, %11, %int1_65 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %672 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_66 = torch.constant.int 1
    %int7_67 = torch.constant.int 7
    %int6400 = torch.constant.int 6400
    %673 = torch.prim.ListConstruct %int1_66, %int7_67, %int6400 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %674 = torch.aten.reshape %671, %673 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %675 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %676 = torch.aten.mul.Tensor %674, %675 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %677 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %678 = torch.aten.pow.Tensor_Tensor %674, %677 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %679 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %680 = torch.aten.mul.Tensor %678, %679 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_68 = torch.constant.int 1
    %681 = torch.aten.add.Tensor %674, %680, %int1_68 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %682 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %683 = torch.aten.mul.Tensor %681, %682 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %684 = torch.aten.tanh %683 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %685 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_69 = torch.constant.int 1
    %686 = torch.aten.add.Tensor %684, %685, %int1_69 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %687 = torch.aten.mul.Tensor %676, %686 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %688 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_70 = torch.constant.int 7
    %int6400_71 = torch.constant.int 6400
    %689 = torch.prim.ListConstruct %int7_70, %int6400_71 : (!torch.int, !torch.int) -> !torch.list<int>
    %690 = torch.aten.reshape %687, %689 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_72 = torch.constant.int 0
    %int1_73 = torch.constant.int 1
    %691 = torch.aten.mm %690, %12 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %692 = torch.aten.add.Tensor %691, %13, %int1_73 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %693 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_74 = torch.constant.int 1
    %int7_75 = torch.constant.int 7
    %int1600_76 = torch.constant.int 1600
    %694 = torch.prim.ListConstruct %int1_74, %int7_75, %int1600_76 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %695 = torch.aten.reshape %692, %694 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_77 = torch.constant.int 1
    %696 = torch.aten.add.Tensor %665, %695, %int1_77 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_78 = torch.constant.float 9.9999997473787516E-6
    %int1600_79 = torch.constant.int 1600
    %697 = torch.prim.ListConstruct %int1600_79 : (!torch.int) -> !torch.list<int>
    %result0_80, %result1_81, %result2_82 = torch.aten.native_layer_norm %696, %697, %14, %15, %float9.999990e-06_78 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %698 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_83 = torch.constant.int 7
    %int1600_84 = torch.constant.int 1600
    %699 = torch.prim.ListConstruct %int7_83, %int1600_84 : (!torch.int, !torch.int) -> !torch.list<int>
    %700 = torch.aten.reshape %result0_80, %699 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_85 = torch.constant.int 0
    %int1_86 = torch.constant.int 1
    %701 = torch.aten.mm %700, %16 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %702 = torch.aten.add.Tensor %701, %17, %int1_86 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %703 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_87 = torch.constant.int 1
    %int7_88 = torch.constant.int 7
    %int4800_89 = torch.constant.int 4800
    %704 = torch.prim.ListConstruct %int1_87, %int7_88, %int4800_89 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %705 = torch.aten.reshape %702, %704 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %706 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %707 = torch.prim.tolist(%706) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_90 = torch.constant.int 2
    %708 = torch.aten.split_with_sizes %705, %707, %int2_90 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %709:3 = torch.prim.ListUnpack %708 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %710 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_91 = torch.constant.int 1
    %int7_92 = torch.constant.int 7
    %int25_93 = torch.constant.int 25
    %int64_94 = torch.constant.int 64
    %711 = torch.prim.ListConstruct %int1_91, %int7_92, %int25_93, %int64_94 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %712 = torch.aten.reshape %709#0, %711 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_95 = torch.constant.int 1
    %int2_96 = torch.constant.int 2
    %713 = torch.aten.transpose.int %712, %int1_95, %int2_96 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %714 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_97 = torch.constant.int 1
    %int7_98 = torch.constant.int 7
    %int25_99 = torch.constant.int 25
    %int64_100 = torch.constant.int 64
    %715 = torch.prim.ListConstruct %int1_97, %int7_98, %int25_99, %int64_100 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %716 = torch.aten.reshape %709#1, %715 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_101 = torch.constant.int 1
    %int2_102 = torch.constant.int 2
    %717 = torch.aten.transpose.int %716, %int1_101, %int2_102 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %718 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_103 = torch.constant.int 1
    %int7_104 = torch.constant.int 7
    %int25_105 = torch.constant.int 25
    %int64_106 = torch.constant.int 64
    %719 = torch.prim.ListConstruct %int1_103, %int7_104, %int25_105, %int64_106 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %720 = torch.aten.reshape %709#2, %719 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_107 = torch.constant.int 1
    %int2_108 = torch.constant.int 2
    %721 = torch.aten.transpose.int %720, %int1_107, %int2_108 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_109 = torch.constant.int 1
    %int2_110 = torch.constant.int 2
    %722 = torch.aten.transpose.int %716, %int1_109, %int2_110 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_111 = torch.constant.int 2
    %int3_112 = torch.constant.int 3
    %723 = torch.aten.transpose.int %722, %int2_111, %int3_112 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %724 = torch.aten.matmul %713, %723 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %725 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %726 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %727 = torch.aten.pow.Tensor_Tensor %725, %726 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %728 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_113 = torch.constant.int 0
    %729 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_114 = torch.constant.none
    %float0.000000e00_115 = torch.constant.float 0.000000e+00
    %730 = torch.aten.full %729, %float0.000000e00_115, %none_114, %none_114, %none_114, %none_114 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_116 = torch.constant.int 1
    %731 = torch.aten.add.Tensor %730, %727, %int1_116 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %732 = torch.aten.div.Tensor %724, %731 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %733 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_117 = torch.constant.int 6
    %none_118 = torch.constant.none
    %false_119 = torch.constant.bool false
    %734 = torch.aten.to.dtype %732, %int6_117, %false_119, %false_119, %none_118 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %735 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %736 = torch.aten.where.self %733, %734, %735 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_120 = torch.constant.int 3
    %none_121 = torch.constant.none
    %737 = torch.aten.softmax.int %736, %int3_120, %none_121 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_122 = torch.constant.int 6
    %none_123 = torch.constant.none
    %false_124 = torch.constant.bool false
    %738 = torch.aten.to.dtype %737, %int6_122, %false_124, %false_124, %none_123 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %739 = torch.aten.matmul %738, %721 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_125 = torch.constant.int 1
    %int2_126 = torch.constant.int 2
    %740 = torch.aten.transpose.int %739, %int1_125, %int2_126 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %741 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_127 = torch.constant.int 1
    %int7_128 = torch.constant.int 7
    %int1600_129 = torch.constant.int 1600
    %742 = torch.prim.ListConstruct %int1_127, %int7_128, %int1600_129 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %743 = torch.aten.reshape %740, %742 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %744 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_130 = torch.constant.int 7
    %int1600_131 = torch.constant.int 1600
    %745 = torch.prim.ListConstruct %int7_130, %int1600_131 : (!torch.int, !torch.int) -> !torch.list<int>
    %746 = torch.aten.reshape %743, %745 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_132 = torch.constant.int 0
    %int1_133 = torch.constant.int 1
    %747 = torch.aten.mm %746, %18 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %748 = torch.aten.add.Tensor %747, %19, %int1_133 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %749 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_134 = torch.constant.int 1
    %int7_135 = torch.constant.int 7
    %int1600_136 = torch.constant.int 1600
    %750 = torch.prim.ListConstruct %int1_134, %int7_135, %int1600_136 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %751 = torch.aten.reshape %748, %750 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_137 = torch.constant.int 1
    %752 = torch.aten.add.Tensor %751, %696, %int1_137 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_138 = torch.constant.float 9.9999997473787516E-6
    %int1600_139 = torch.constant.int 1600
    %753 = torch.prim.ListConstruct %int1600_139 : (!torch.int) -> !torch.list<int>
    %result0_140, %result1_141, %result2_142 = torch.aten.native_layer_norm %752, %753, %20, %21, %float9.999990e-06_138 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %754 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_143 = torch.constant.int 7
    %int1600_144 = torch.constant.int 1600
    %755 = torch.prim.ListConstruct %int7_143, %int1600_144 : (!torch.int, !torch.int) -> !torch.list<int>
    %756 = torch.aten.reshape %result0_140, %755 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_145 = torch.constant.int 0
    %int1_146 = torch.constant.int 1
    %757 = torch.aten.mm %756, %22 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %758 = torch.aten.add.Tensor %757, %23, %int1_146 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %759 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_147 = torch.constant.int 1
    %int7_148 = torch.constant.int 7
    %int6400_149 = torch.constant.int 6400
    %760 = torch.prim.ListConstruct %int1_147, %int7_148, %int6400_149 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %761 = torch.aten.reshape %758, %760 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %762 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %763 = torch.aten.mul.Tensor %761, %762 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %764 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %765 = torch.aten.pow.Tensor_Tensor %761, %764 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %766 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %767 = torch.aten.mul.Tensor %765, %766 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_150 = torch.constant.int 1
    %768 = torch.aten.add.Tensor %761, %767, %int1_150 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %769 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %770 = torch.aten.mul.Tensor %768, %769 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %771 = torch.aten.tanh %770 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %772 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_151 = torch.constant.int 1
    %773 = torch.aten.add.Tensor %771, %772, %int1_151 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %774 = torch.aten.mul.Tensor %763, %773 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %775 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_152 = torch.constant.int 7
    %int6400_153 = torch.constant.int 6400
    %776 = torch.prim.ListConstruct %int7_152, %int6400_153 : (!torch.int, !torch.int) -> !torch.list<int>
    %777 = torch.aten.reshape %774, %776 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_154 = torch.constant.int 0
    %int1_155 = torch.constant.int 1
    %778 = torch.aten.mm %777, %24 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %779 = torch.aten.add.Tensor %778, %25, %int1_155 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %780 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_156 = torch.constant.int 1
    %int7_157 = torch.constant.int 7
    %int1600_158 = torch.constant.int 1600
    %781 = torch.prim.ListConstruct %int1_156, %int7_157, %int1600_158 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %782 = torch.aten.reshape %779, %781 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_159 = torch.constant.int 1
    %783 = torch.aten.add.Tensor %752, %782, %int1_159 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_160 = torch.constant.float 9.9999997473787516E-6
    %int1600_161 = torch.constant.int 1600
    %784 = torch.prim.ListConstruct %int1600_161 : (!torch.int) -> !torch.list<int>
    %result0_162, %result1_163, %result2_164 = torch.aten.native_layer_norm %783, %784, %26, %27, %float9.999990e-06_160 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %785 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_165 = torch.constant.int 7
    %int1600_166 = torch.constant.int 1600
    %786 = torch.prim.ListConstruct %int7_165, %int1600_166 : (!torch.int, !torch.int) -> !torch.list<int>
    %787 = torch.aten.reshape %result0_162, %786 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_167 = torch.constant.int 0
    %int1_168 = torch.constant.int 1
    %788 = torch.aten.mm %787, %28 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %789 = torch.aten.add.Tensor %788, %29, %int1_168 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %790 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_169 = torch.constant.int 1
    %int7_170 = torch.constant.int 7
    %int4800_171 = torch.constant.int 4800
    %791 = torch.prim.ListConstruct %int1_169, %int7_170, %int4800_171 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %792 = torch.aten.reshape %789, %791 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %793 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %794 = torch.prim.tolist(%793) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_172 = torch.constant.int 2
    %795 = torch.aten.split_with_sizes %792, %794, %int2_172 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %796:3 = torch.prim.ListUnpack %795 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %797 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_173 = torch.constant.int 1
    %int7_174 = torch.constant.int 7
    %int25_175 = torch.constant.int 25
    %int64_176 = torch.constant.int 64
    %798 = torch.prim.ListConstruct %int1_173, %int7_174, %int25_175, %int64_176 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %799 = torch.aten.reshape %796#0, %798 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_177 = torch.constant.int 1
    %int2_178 = torch.constant.int 2
    %800 = torch.aten.transpose.int %799, %int1_177, %int2_178 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %801 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_179 = torch.constant.int 1
    %int7_180 = torch.constant.int 7
    %int25_181 = torch.constant.int 25
    %int64_182 = torch.constant.int 64
    %802 = torch.prim.ListConstruct %int1_179, %int7_180, %int25_181, %int64_182 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %803 = torch.aten.reshape %796#1, %802 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_183 = torch.constant.int 1
    %int2_184 = torch.constant.int 2
    %804 = torch.aten.transpose.int %803, %int1_183, %int2_184 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %805 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_185 = torch.constant.int 1
    %int7_186 = torch.constant.int 7
    %int25_187 = torch.constant.int 25
    %int64_188 = torch.constant.int 64
    %806 = torch.prim.ListConstruct %int1_185, %int7_186, %int25_187, %int64_188 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %807 = torch.aten.reshape %796#2, %806 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_189 = torch.constant.int 1
    %int2_190 = torch.constant.int 2
    %808 = torch.aten.transpose.int %807, %int1_189, %int2_190 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_191 = torch.constant.int 1
    %int2_192 = torch.constant.int 2
    %809 = torch.aten.transpose.int %803, %int1_191, %int2_192 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_193 = torch.constant.int 2
    %int3_194 = torch.constant.int 3
    %810 = torch.aten.transpose.int %809, %int2_193, %int3_194 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %811 = torch.aten.matmul %800, %810 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %812 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %813 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %814 = torch.aten.pow.Tensor_Tensor %812, %813 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %815 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_195 = torch.constant.int 0
    %816 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_196 = torch.constant.none
    %float0.000000e00_197 = torch.constant.float 0.000000e+00
    %817 = torch.aten.full %816, %float0.000000e00_197, %none_196, %none_196, %none_196, %none_196 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_198 = torch.constant.int 1
    %818 = torch.aten.add.Tensor %817, %814, %int1_198 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %819 = torch.aten.div.Tensor %811, %818 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %820 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_199 = torch.constant.int 6
    %none_200 = torch.constant.none
    %false_201 = torch.constant.bool false
    %821 = torch.aten.to.dtype %819, %int6_199, %false_201, %false_201, %none_200 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %822 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %823 = torch.aten.where.self %820, %821, %822 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_202 = torch.constant.int 3
    %none_203 = torch.constant.none
    %824 = torch.aten.softmax.int %823, %int3_202, %none_203 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_204 = torch.constant.int 6
    %none_205 = torch.constant.none
    %false_206 = torch.constant.bool false
    %825 = torch.aten.to.dtype %824, %int6_204, %false_206, %false_206, %none_205 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %826 = torch.aten.matmul %825, %808 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_207 = torch.constant.int 1
    %int2_208 = torch.constant.int 2
    %827 = torch.aten.transpose.int %826, %int1_207, %int2_208 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %828 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_209 = torch.constant.int 1
    %int7_210 = torch.constant.int 7
    %int1600_211 = torch.constant.int 1600
    %829 = torch.prim.ListConstruct %int1_209, %int7_210, %int1600_211 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %830 = torch.aten.reshape %827, %829 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %831 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_212 = torch.constant.int 7
    %int1600_213 = torch.constant.int 1600
    %832 = torch.prim.ListConstruct %int7_212, %int1600_213 : (!torch.int, !torch.int) -> !torch.list<int>
    %833 = torch.aten.reshape %830, %832 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_214 = torch.constant.int 0
    %int1_215 = torch.constant.int 1
    %834 = torch.aten.mm %833, %30 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %835 = torch.aten.add.Tensor %834, %31, %int1_215 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %836 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_216 = torch.constant.int 1
    %int7_217 = torch.constant.int 7
    %int1600_218 = torch.constant.int 1600
    %837 = torch.prim.ListConstruct %int1_216, %int7_217, %int1600_218 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %838 = torch.aten.reshape %835, %837 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_219 = torch.constant.int 1
    %839 = torch.aten.add.Tensor %838, %783, %int1_219 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_220 = torch.constant.float 9.9999997473787516E-6
    %int1600_221 = torch.constant.int 1600
    %840 = torch.prim.ListConstruct %int1600_221 : (!torch.int) -> !torch.list<int>
    %result0_222, %result1_223, %result2_224 = torch.aten.native_layer_norm %839, %840, %32, %33, %float9.999990e-06_220 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %841 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_225 = torch.constant.int 7
    %int1600_226 = torch.constant.int 1600
    %842 = torch.prim.ListConstruct %int7_225, %int1600_226 : (!torch.int, !torch.int) -> !torch.list<int>
    %843 = torch.aten.reshape %result0_222, %842 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_227 = torch.constant.int 0
    %int1_228 = torch.constant.int 1
    %844 = torch.aten.mm %843, %34 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %845 = torch.aten.add.Tensor %844, %35, %int1_228 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %846 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_229 = torch.constant.int 1
    %int7_230 = torch.constant.int 7
    %int6400_231 = torch.constant.int 6400
    %847 = torch.prim.ListConstruct %int1_229, %int7_230, %int6400_231 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %848 = torch.aten.reshape %845, %847 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %849 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %850 = torch.aten.mul.Tensor %848, %849 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %851 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %852 = torch.aten.pow.Tensor_Tensor %848, %851 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %853 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %854 = torch.aten.mul.Tensor %852, %853 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_232 = torch.constant.int 1
    %855 = torch.aten.add.Tensor %848, %854, %int1_232 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %856 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %857 = torch.aten.mul.Tensor %855, %856 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %858 = torch.aten.tanh %857 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %859 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_233 = torch.constant.int 1
    %860 = torch.aten.add.Tensor %858, %859, %int1_233 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %861 = torch.aten.mul.Tensor %850, %860 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %862 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_234 = torch.constant.int 7
    %int6400_235 = torch.constant.int 6400
    %863 = torch.prim.ListConstruct %int7_234, %int6400_235 : (!torch.int, !torch.int) -> !torch.list<int>
    %864 = torch.aten.reshape %861, %863 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_236 = torch.constant.int 0
    %int1_237 = torch.constant.int 1
    %865 = torch.aten.mm %864, %36 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %866 = torch.aten.add.Tensor %865, %37, %int1_237 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %867 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_238 = torch.constant.int 1
    %int7_239 = torch.constant.int 7
    %int1600_240 = torch.constant.int 1600
    %868 = torch.prim.ListConstruct %int1_238, %int7_239, %int1600_240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %869 = torch.aten.reshape %866, %868 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_241 = torch.constant.int 1
    %870 = torch.aten.add.Tensor %839, %869, %int1_241 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_242 = torch.constant.float 9.9999997473787516E-6
    %int1600_243 = torch.constant.int 1600
    %871 = torch.prim.ListConstruct %int1600_243 : (!torch.int) -> !torch.list<int>
    %result0_244, %result1_245, %result2_246 = torch.aten.native_layer_norm %870, %871, %38, %39, %float9.999990e-06_242 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %872 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_247 = torch.constant.int 7
    %int1600_248 = torch.constant.int 1600
    %873 = torch.prim.ListConstruct %int7_247, %int1600_248 : (!torch.int, !torch.int) -> !torch.list<int>
    %874 = torch.aten.reshape %result0_244, %873 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_249 = torch.constant.int 0
    %int1_250 = torch.constant.int 1
    %875 = torch.aten.mm %874, %40 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %876 = torch.aten.add.Tensor %875, %41, %int1_250 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %877 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_251 = torch.constant.int 1
    %int7_252 = torch.constant.int 7
    %int4800_253 = torch.constant.int 4800
    %878 = torch.prim.ListConstruct %int1_251, %int7_252, %int4800_253 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %879 = torch.aten.reshape %876, %878 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %880 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %881 = torch.prim.tolist(%880) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_254 = torch.constant.int 2
    %882 = torch.aten.split_with_sizes %879, %881, %int2_254 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %883:3 = torch.prim.ListUnpack %882 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %884 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_255 = torch.constant.int 1
    %int7_256 = torch.constant.int 7
    %int25_257 = torch.constant.int 25
    %int64_258 = torch.constant.int 64
    %885 = torch.prim.ListConstruct %int1_255, %int7_256, %int25_257, %int64_258 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %886 = torch.aten.reshape %883#0, %885 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_259 = torch.constant.int 1
    %int2_260 = torch.constant.int 2
    %887 = torch.aten.transpose.int %886, %int1_259, %int2_260 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %888 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_261 = torch.constant.int 1
    %int7_262 = torch.constant.int 7
    %int25_263 = torch.constant.int 25
    %int64_264 = torch.constant.int 64
    %889 = torch.prim.ListConstruct %int1_261, %int7_262, %int25_263, %int64_264 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %890 = torch.aten.reshape %883#1, %889 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_265 = torch.constant.int 1
    %int2_266 = torch.constant.int 2
    %891 = torch.aten.transpose.int %890, %int1_265, %int2_266 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %892 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_267 = torch.constant.int 1
    %int7_268 = torch.constant.int 7
    %int25_269 = torch.constant.int 25
    %int64_270 = torch.constant.int 64
    %893 = torch.prim.ListConstruct %int1_267, %int7_268, %int25_269, %int64_270 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %894 = torch.aten.reshape %883#2, %893 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_271 = torch.constant.int 1
    %int2_272 = torch.constant.int 2
    %895 = torch.aten.transpose.int %894, %int1_271, %int2_272 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_273 = torch.constant.int 1
    %int2_274 = torch.constant.int 2
    %896 = torch.aten.transpose.int %890, %int1_273, %int2_274 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_275 = torch.constant.int 2
    %int3_276 = torch.constant.int 3
    %897 = torch.aten.transpose.int %896, %int2_275, %int3_276 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %898 = torch.aten.matmul %887, %897 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %899 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %900 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %901 = torch.aten.pow.Tensor_Tensor %899, %900 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %902 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_277 = torch.constant.int 0
    %903 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_278 = torch.constant.none
    %float0.000000e00_279 = torch.constant.float 0.000000e+00
    %904 = torch.aten.full %903, %float0.000000e00_279, %none_278, %none_278, %none_278, %none_278 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_280 = torch.constant.int 1
    %905 = torch.aten.add.Tensor %904, %901, %int1_280 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %906 = torch.aten.div.Tensor %898, %905 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %907 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_281 = torch.constant.int 6
    %none_282 = torch.constant.none
    %false_283 = torch.constant.bool false
    %908 = torch.aten.to.dtype %906, %int6_281, %false_283, %false_283, %none_282 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %909 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %910 = torch.aten.where.self %907, %908, %909 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_284 = torch.constant.int 3
    %none_285 = torch.constant.none
    %911 = torch.aten.softmax.int %910, %int3_284, %none_285 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_286 = torch.constant.int 6
    %none_287 = torch.constant.none
    %false_288 = torch.constant.bool false
    %912 = torch.aten.to.dtype %911, %int6_286, %false_288, %false_288, %none_287 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %913 = torch.aten.matmul %912, %895 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_289 = torch.constant.int 1
    %int2_290 = torch.constant.int 2
    %914 = torch.aten.transpose.int %913, %int1_289, %int2_290 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %915 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_291 = torch.constant.int 1
    %int7_292 = torch.constant.int 7
    %int1600_293 = torch.constant.int 1600
    %916 = torch.prim.ListConstruct %int1_291, %int7_292, %int1600_293 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %917 = torch.aten.reshape %914, %916 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %918 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_294 = torch.constant.int 7
    %int1600_295 = torch.constant.int 1600
    %919 = torch.prim.ListConstruct %int7_294, %int1600_295 : (!torch.int, !torch.int) -> !torch.list<int>
    %920 = torch.aten.reshape %917, %919 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_296 = torch.constant.int 0
    %int1_297 = torch.constant.int 1
    %921 = torch.aten.mm %920, %42 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %922 = torch.aten.add.Tensor %921, %43, %int1_297 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %923 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_298 = torch.constant.int 1
    %int7_299 = torch.constant.int 7
    %int1600_300 = torch.constant.int 1600
    %924 = torch.prim.ListConstruct %int1_298, %int7_299, %int1600_300 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %925 = torch.aten.reshape %922, %924 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_301 = torch.constant.int 1
    %926 = torch.aten.add.Tensor %925, %870, %int1_301 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_302 = torch.constant.float 9.9999997473787516E-6
    %int1600_303 = torch.constant.int 1600
    %927 = torch.prim.ListConstruct %int1600_303 : (!torch.int) -> !torch.list<int>
    %result0_304, %result1_305, %result2_306 = torch.aten.native_layer_norm %926, %927, %44, %45, %float9.999990e-06_302 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %928 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_307 = torch.constant.int 7
    %int1600_308 = torch.constant.int 1600
    %929 = torch.prim.ListConstruct %int7_307, %int1600_308 : (!torch.int, !torch.int) -> !torch.list<int>
    %930 = torch.aten.reshape %result0_304, %929 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_309 = torch.constant.int 0
    %int1_310 = torch.constant.int 1
    %931 = torch.aten.mm %930, %46 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %932 = torch.aten.add.Tensor %931, %47, %int1_310 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %933 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_311 = torch.constant.int 1
    %int7_312 = torch.constant.int 7
    %int6400_313 = torch.constant.int 6400
    %934 = torch.prim.ListConstruct %int1_311, %int7_312, %int6400_313 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %935 = torch.aten.reshape %932, %934 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %936 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %937 = torch.aten.mul.Tensor %935, %936 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %938 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %939 = torch.aten.pow.Tensor_Tensor %935, %938 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %940 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %941 = torch.aten.mul.Tensor %939, %940 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_314 = torch.constant.int 1
    %942 = torch.aten.add.Tensor %935, %941, %int1_314 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %943 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %944 = torch.aten.mul.Tensor %942, %943 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %945 = torch.aten.tanh %944 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %946 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_315 = torch.constant.int 1
    %947 = torch.aten.add.Tensor %945, %946, %int1_315 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %948 = torch.aten.mul.Tensor %937, %947 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %949 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_316 = torch.constant.int 7
    %int6400_317 = torch.constant.int 6400
    %950 = torch.prim.ListConstruct %int7_316, %int6400_317 : (!torch.int, !torch.int) -> !torch.list<int>
    %951 = torch.aten.reshape %948, %950 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_318 = torch.constant.int 0
    %int1_319 = torch.constant.int 1
    %952 = torch.aten.mm %951, %48 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %953 = torch.aten.add.Tensor %952, %49, %int1_319 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %954 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_320 = torch.constant.int 1
    %int7_321 = torch.constant.int 7
    %int1600_322 = torch.constant.int 1600
    %955 = torch.prim.ListConstruct %int1_320, %int7_321, %int1600_322 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %956 = torch.aten.reshape %953, %955 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_323 = torch.constant.int 1
    %957 = torch.aten.add.Tensor %926, %956, %int1_323 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_324 = torch.constant.float 9.9999997473787516E-6
    %int1600_325 = torch.constant.int 1600
    %958 = torch.prim.ListConstruct %int1600_325 : (!torch.int) -> !torch.list<int>
    %result0_326, %result1_327, %result2_328 = torch.aten.native_layer_norm %957, %958, %50, %51, %float9.999990e-06_324 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %959 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_329 = torch.constant.int 7
    %int1600_330 = torch.constant.int 1600
    %960 = torch.prim.ListConstruct %int7_329, %int1600_330 : (!torch.int, !torch.int) -> !torch.list<int>
    %961 = torch.aten.reshape %result0_326, %960 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_331 = torch.constant.int 0
    %int1_332 = torch.constant.int 1
    %962 = torch.aten.mm %961, %52 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %963 = torch.aten.add.Tensor %962, %53, %int1_332 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %964 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_333 = torch.constant.int 1
    %int7_334 = torch.constant.int 7
    %int4800_335 = torch.constant.int 4800
    %965 = torch.prim.ListConstruct %int1_333, %int7_334, %int4800_335 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %966 = torch.aten.reshape %963, %965 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %967 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %968 = torch.prim.tolist(%967) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_336 = torch.constant.int 2
    %969 = torch.aten.split_with_sizes %966, %968, %int2_336 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %970:3 = torch.prim.ListUnpack %969 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %971 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_337 = torch.constant.int 1
    %int7_338 = torch.constant.int 7
    %int25_339 = torch.constant.int 25
    %int64_340 = torch.constant.int 64
    %972 = torch.prim.ListConstruct %int1_337, %int7_338, %int25_339, %int64_340 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %973 = torch.aten.reshape %970#0, %972 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_341 = torch.constant.int 1
    %int2_342 = torch.constant.int 2
    %974 = torch.aten.transpose.int %973, %int1_341, %int2_342 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %975 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_343 = torch.constant.int 1
    %int7_344 = torch.constant.int 7
    %int25_345 = torch.constant.int 25
    %int64_346 = torch.constant.int 64
    %976 = torch.prim.ListConstruct %int1_343, %int7_344, %int25_345, %int64_346 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %977 = torch.aten.reshape %970#1, %976 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_347 = torch.constant.int 1
    %int2_348 = torch.constant.int 2
    %978 = torch.aten.transpose.int %977, %int1_347, %int2_348 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %979 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_349 = torch.constant.int 1
    %int7_350 = torch.constant.int 7
    %int25_351 = torch.constant.int 25
    %int64_352 = torch.constant.int 64
    %980 = torch.prim.ListConstruct %int1_349, %int7_350, %int25_351, %int64_352 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %981 = torch.aten.reshape %970#2, %980 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_353 = torch.constant.int 1
    %int2_354 = torch.constant.int 2
    %982 = torch.aten.transpose.int %981, %int1_353, %int2_354 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_355 = torch.constant.int 1
    %int2_356 = torch.constant.int 2
    %983 = torch.aten.transpose.int %977, %int1_355, %int2_356 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_357 = torch.constant.int 2
    %int3_358 = torch.constant.int 3
    %984 = torch.aten.transpose.int %983, %int2_357, %int3_358 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %985 = torch.aten.matmul %974, %984 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %986 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %987 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %988 = torch.aten.pow.Tensor_Tensor %986, %987 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %989 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_359 = torch.constant.int 0
    %990 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_360 = torch.constant.none
    %float0.000000e00_361 = torch.constant.float 0.000000e+00
    %991 = torch.aten.full %990, %float0.000000e00_361, %none_360, %none_360, %none_360, %none_360 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_362 = torch.constant.int 1
    %992 = torch.aten.add.Tensor %991, %988, %int1_362 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %993 = torch.aten.div.Tensor %985, %992 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %994 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_363 = torch.constant.int 6
    %none_364 = torch.constant.none
    %false_365 = torch.constant.bool false
    %995 = torch.aten.to.dtype %993, %int6_363, %false_365, %false_365, %none_364 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %996 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %997 = torch.aten.where.self %994, %995, %996 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_366 = torch.constant.int 3
    %none_367 = torch.constant.none
    %998 = torch.aten.softmax.int %997, %int3_366, %none_367 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_368 = torch.constant.int 6
    %none_369 = torch.constant.none
    %false_370 = torch.constant.bool false
    %999 = torch.aten.to.dtype %998, %int6_368, %false_370, %false_370, %none_369 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1000 = torch.aten.matmul %999, %982 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_371 = torch.constant.int 1
    %int2_372 = torch.constant.int 2
    %1001 = torch.aten.transpose.int %1000, %int1_371, %int2_372 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %1002 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_373 = torch.constant.int 1
    %int7_374 = torch.constant.int 7
    %int1600_375 = torch.constant.int 1600
    %1003 = torch.prim.ListConstruct %int1_373, %int7_374, %int1600_375 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1004 = torch.aten.reshape %1001, %1003 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %1005 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_376 = torch.constant.int 7
    %int1600_377 = torch.constant.int 1600
    %1006 = torch.prim.ListConstruct %int7_376, %int1600_377 : (!torch.int, !torch.int) -> !torch.list<int>
    %1007 = torch.aten.reshape %1004, %1006 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_378 = torch.constant.int 0
    %int1_379 = torch.constant.int 1
    %1008 = torch.aten.mm %1007, %54 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1009 = torch.aten.add.Tensor %1008, %55, %int1_379 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1010 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_380 = torch.constant.int 1
    %int7_381 = torch.constant.int 7
    %int1600_382 = torch.constant.int 1600
    %1011 = torch.prim.ListConstruct %int1_380, %int7_381, %int1600_382 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1012 = torch.aten.reshape %1009, %1011 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_383 = torch.constant.int 1
    %1013 = torch.aten.add.Tensor %1012, %957, %int1_383 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_384 = torch.constant.float 9.9999997473787516E-6
    %int1600_385 = torch.constant.int 1600
    %1014 = torch.prim.ListConstruct %int1600_385 : (!torch.int) -> !torch.list<int>
    %result0_386, %result1_387, %result2_388 = torch.aten.native_layer_norm %1013, %1014, %56, %57, %float9.999990e-06_384 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1015 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_389 = torch.constant.int 7
    %int1600_390 = torch.constant.int 1600
    %1016 = torch.prim.ListConstruct %int7_389, %int1600_390 : (!torch.int, !torch.int) -> !torch.list<int>
    %1017 = torch.aten.reshape %result0_386, %1016 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_391 = torch.constant.int 0
    %int1_392 = torch.constant.int 1
    %1018 = torch.aten.mm %1017, %58 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %1019 = torch.aten.add.Tensor %1018, %59, %int1_392 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %1020 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_393 = torch.constant.int 1
    %int7_394 = torch.constant.int 7
    %int6400_395 = torch.constant.int 6400
    %1021 = torch.prim.ListConstruct %int1_393, %int7_394, %int6400_395 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1022 = torch.aten.reshape %1019, %1021 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %1023 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1024 = torch.aten.mul.Tensor %1022, %1023 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1025 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %1026 = torch.aten.pow.Tensor_Tensor %1022, %1025 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1027 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1028 = torch.aten.mul.Tensor %1026, %1027 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_396 = torch.constant.int 1
    %1029 = torch.aten.add.Tensor %1022, %1028, %int1_396 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1030 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %1031 = torch.aten.mul.Tensor %1029, %1030 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1032 = torch.aten.tanh %1031 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1033 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_397 = torch.constant.int 1
    %1034 = torch.aten.add.Tensor %1032, %1033, %int1_397 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1035 = torch.aten.mul.Tensor %1024, %1034 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1036 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_398 = torch.constant.int 7
    %int6400_399 = torch.constant.int 6400
    %1037 = torch.prim.ListConstruct %int7_398, %int6400_399 : (!torch.int, !torch.int) -> !torch.list<int>
    %1038 = torch.aten.reshape %1035, %1037 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_400 = torch.constant.int 0
    %int1_401 = torch.constant.int 1
    %1039 = torch.aten.mm %1038, %60 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1040 = torch.aten.add.Tensor %1039, %61, %int1_401 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1041 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_402 = torch.constant.int 1
    %int7_403 = torch.constant.int 7
    %int1600_404 = torch.constant.int 1600
    %1042 = torch.prim.ListConstruct %int1_402, %int7_403, %int1600_404 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1043 = torch.aten.reshape %1040, %1042 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_405 = torch.constant.int 1
    %1044 = torch.aten.add.Tensor %1013, %1043, %int1_405 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_406 = torch.constant.float 9.9999997473787516E-6
    %int1600_407 = torch.constant.int 1600
    %1045 = torch.prim.ListConstruct %int1600_407 : (!torch.int) -> !torch.list<int>
    %result0_408, %result1_409, %result2_410 = torch.aten.native_layer_norm %1044, %1045, %62, %63, %float9.999990e-06_406 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1046 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_411 = torch.constant.int 7
    %int1600_412 = torch.constant.int 1600
    %1047 = torch.prim.ListConstruct %int7_411, %int1600_412 : (!torch.int, !torch.int) -> !torch.list<int>
    %1048 = torch.aten.reshape %result0_408, %1047 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_413 = torch.constant.int 0
    %int1_414 = torch.constant.int 1
    %1049 = torch.aten.mm %1048, %64 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %1050 = torch.aten.add.Tensor %1049, %65, %int1_414 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %1051 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_415 = torch.constant.int 1
    %int7_416 = torch.constant.int 7
    %int4800_417 = torch.constant.int 4800
    %1052 = torch.prim.ListConstruct %int1_415, %int7_416, %int4800_417 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1053 = torch.aten.reshape %1050, %1052 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %1054 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %1055 = torch.prim.tolist(%1054) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_418 = torch.constant.int 2
    %1056 = torch.aten.split_with_sizes %1053, %1055, %int2_418 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %1057:3 = torch.prim.ListUnpack %1056 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %1058 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_419 = torch.constant.int 1
    %int7_420 = torch.constant.int 7
    %int25_421 = torch.constant.int 25
    %int64_422 = torch.constant.int 64
    %1059 = torch.prim.ListConstruct %int1_419, %int7_420, %int25_421, %int64_422 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1060 = torch.aten.reshape %1057#0, %1059 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_423 = torch.constant.int 1
    %int2_424 = torch.constant.int 2
    %1061 = torch.aten.transpose.int %1060, %int1_423, %int2_424 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1062 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_425 = torch.constant.int 1
    %int7_426 = torch.constant.int 7
    %int25_427 = torch.constant.int 25
    %int64_428 = torch.constant.int 64
    %1063 = torch.prim.ListConstruct %int1_425, %int7_426, %int25_427, %int64_428 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1064 = torch.aten.reshape %1057#1, %1063 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_429 = torch.constant.int 1
    %int2_430 = torch.constant.int 2
    %1065 = torch.aten.transpose.int %1064, %int1_429, %int2_430 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1066 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_431 = torch.constant.int 1
    %int7_432 = torch.constant.int 7
    %int25_433 = torch.constant.int 25
    %int64_434 = torch.constant.int 64
    %1067 = torch.prim.ListConstruct %int1_431, %int7_432, %int25_433, %int64_434 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1068 = torch.aten.reshape %1057#2, %1067 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_435 = torch.constant.int 1
    %int2_436 = torch.constant.int 2
    %1069 = torch.aten.transpose.int %1068, %int1_435, %int2_436 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_437 = torch.constant.int 1
    %int2_438 = torch.constant.int 2
    %1070 = torch.aten.transpose.int %1064, %int1_437, %int2_438 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_439 = torch.constant.int 2
    %int3_440 = torch.constant.int 3
    %1071 = torch.aten.transpose.int %1070, %int2_439, %int3_440 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %1072 = torch.aten.matmul %1061, %1071 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1073 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1074 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1075 = torch.aten.pow.Tensor_Tensor %1073, %1074 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %1076 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_441 = torch.constant.int 0
    %1077 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_442 = torch.constant.none
    %float0.000000e00_443 = torch.constant.float 0.000000e+00
    %1078 = torch.aten.full %1077, %float0.000000e00_443, %none_442, %none_442, %none_442, %none_442 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_444 = torch.constant.int 1
    %1079 = torch.aten.add.Tensor %1078, %1075, %int1_444 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %1080 = torch.aten.div.Tensor %1072, %1079 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1081 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_445 = torch.constant.int 6
    %none_446 = torch.constant.none
    %false_447 = torch.constant.bool false
    %1082 = torch.aten.to.dtype %1080, %int6_445, %false_447, %false_447, %none_446 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1083 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %1084 = torch.aten.where.self %1081, %1082, %1083 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_448 = torch.constant.int 3
    %none_449 = torch.constant.none
    %1085 = torch.aten.softmax.int %1084, %int3_448, %none_449 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_450 = torch.constant.int 6
    %none_451 = torch.constant.none
    %false_452 = torch.constant.bool false
    %1086 = torch.aten.to.dtype %1085, %int6_450, %false_452, %false_452, %none_451 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1087 = torch.aten.matmul %1086, %1069 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_453 = torch.constant.int 1
    %int2_454 = torch.constant.int 2
    %1088 = torch.aten.transpose.int %1087, %int1_453, %int2_454 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %1089 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_455 = torch.constant.int 1
    %int7_456 = torch.constant.int 7
    %int1600_457 = torch.constant.int 1600
    %1090 = torch.prim.ListConstruct %int1_455, %int7_456, %int1600_457 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1091 = torch.aten.reshape %1088, %1090 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %1092 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_458 = torch.constant.int 7
    %int1600_459 = torch.constant.int 1600
    %1093 = torch.prim.ListConstruct %int7_458, %int1600_459 : (!torch.int, !torch.int) -> !torch.list<int>
    %1094 = torch.aten.reshape %1091, %1093 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_460 = torch.constant.int 0
    %int1_461 = torch.constant.int 1
    %1095 = torch.aten.mm %1094, %66 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1096 = torch.aten.add.Tensor %1095, %67, %int1_461 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1097 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_462 = torch.constant.int 1
    %int7_463 = torch.constant.int 7
    %int1600_464 = torch.constant.int 1600
    %1098 = torch.prim.ListConstruct %int1_462, %int7_463, %int1600_464 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1099 = torch.aten.reshape %1096, %1098 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_465 = torch.constant.int 1
    %1100 = torch.aten.add.Tensor %1099, %1044, %int1_465 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_466 = torch.constant.float 9.9999997473787516E-6
    %int1600_467 = torch.constant.int 1600
    %1101 = torch.prim.ListConstruct %int1600_467 : (!torch.int) -> !torch.list<int>
    %result0_468, %result1_469, %result2_470 = torch.aten.native_layer_norm %1100, %1101, %68, %69, %float9.999990e-06_466 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1102 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_471 = torch.constant.int 7
    %int1600_472 = torch.constant.int 1600
    %1103 = torch.prim.ListConstruct %int7_471, %int1600_472 : (!torch.int, !torch.int) -> !torch.list<int>
    %1104 = torch.aten.reshape %result0_468, %1103 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_473 = torch.constant.int 0
    %int1_474 = torch.constant.int 1
    %1105 = torch.aten.mm %1104, %70 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %1106 = torch.aten.add.Tensor %1105, %71, %int1_474 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %1107 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_475 = torch.constant.int 1
    %int7_476 = torch.constant.int 7
    %int6400_477 = torch.constant.int 6400
    %1108 = torch.prim.ListConstruct %int1_475, %int7_476, %int6400_477 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1109 = torch.aten.reshape %1106, %1108 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %1110 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1111 = torch.aten.mul.Tensor %1109, %1110 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1112 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %1113 = torch.aten.pow.Tensor_Tensor %1109, %1112 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1114 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1115 = torch.aten.mul.Tensor %1113, %1114 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_478 = torch.constant.int 1
    %1116 = torch.aten.add.Tensor %1109, %1115, %int1_478 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1117 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %1118 = torch.aten.mul.Tensor %1116, %1117 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1119 = torch.aten.tanh %1118 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1120 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_479 = torch.constant.int 1
    %1121 = torch.aten.add.Tensor %1119, %1120, %int1_479 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1122 = torch.aten.mul.Tensor %1111, %1121 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1123 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_480 = torch.constant.int 7
    %int6400_481 = torch.constant.int 6400
    %1124 = torch.prim.ListConstruct %int7_480, %int6400_481 : (!torch.int, !torch.int) -> !torch.list<int>
    %1125 = torch.aten.reshape %1122, %1124 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_482 = torch.constant.int 0
    %int1_483 = torch.constant.int 1
    %1126 = torch.aten.mm %1125, %72 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1127 = torch.aten.add.Tensor %1126, %73, %int1_483 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1128 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_484 = torch.constant.int 1
    %int7_485 = torch.constant.int 7
    %int1600_486 = torch.constant.int 1600
    %1129 = torch.prim.ListConstruct %int1_484, %int7_485, %int1600_486 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1130 = torch.aten.reshape %1127, %1129 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_487 = torch.constant.int 1
    %1131 = torch.aten.add.Tensor %1100, %1130, %int1_487 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_488 = torch.constant.float 9.9999997473787516E-6
    %int1600_489 = torch.constant.int 1600
    %1132 = torch.prim.ListConstruct %int1600_489 : (!torch.int) -> !torch.list<int>
    %result0_490, %result1_491, %result2_492 = torch.aten.native_layer_norm %1131, %1132, %74, %75, %float9.999990e-06_488 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1133 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_493 = torch.constant.int 7
    %int1600_494 = torch.constant.int 1600
    %1134 = torch.prim.ListConstruct %int7_493, %int1600_494 : (!torch.int, !torch.int) -> !torch.list<int>
    %1135 = torch.aten.reshape %result0_490, %1134 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_495 = torch.constant.int 0
    %int1_496 = torch.constant.int 1
    %1136 = torch.aten.mm %1135, %76 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %1137 = torch.aten.add.Tensor %1136, %77, %int1_496 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %1138 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_497 = torch.constant.int 1
    %int7_498 = torch.constant.int 7
    %int4800_499 = torch.constant.int 4800
    %1139 = torch.prim.ListConstruct %int1_497, %int7_498, %int4800_499 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1140 = torch.aten.reshape %1137, %1139 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %1141 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %1142 = torch.prim.tolist(%1141) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_500 = torch.constant.int 2
    %1143 = torch.aten.split_with_sizes %1140, %1142, %int2_500 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %1144:3 = torch.prim.ListUnpack %1143 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %1145 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_501 = torch.constant.int 1
    %int7_502 = torch.constant.int 7
    %int25_503 = torch.constant.int 25
    %int64_504 = torch.constant.int 64
    %1146 = torch.prim.ListConstruct %int1_501, %int7_502, %int25_503, %int64_504 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1147 = torch.aten.reshape %1144#0, %1146 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_505 = torch.constant.int 1
    %int2_506 = torch.constant.int 2
    %1148 = torch.aten.transpose.int %1147, %int1_505, %int2_506 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1149 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_507 = torch.constant.int 1
    %int7_508 = torch.constant.int 7
    %int25_509 = torch.constant.int 25
    %int64_510 = torch.constant.int 64
    %1150 = torch.prim.ListConstruct %int1_507, %int7_508, %int25_509, %int64_510 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1151 = torch.aten.reshape %1144#1, %1150 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_511 = torch.constant.int 1
    %int2_512 = torch.constant.int 2
    %1152 = torch.aten.transpose.int %1151, %int1_511, %int2_512 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1153 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_513 = torch.constant.int 1
    %int7_514 = torch.constant.int 7
    %int25_515 = torch.constant.int 25
    %int64_516 = torch.constant.int 64
    %1154 = torch.prim.ListConstruct %int1_513, %int7_514, %int25_515, %int64_516 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1155 = torch.aten.reshape %1144#2, %1154 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_517 = torch.constant.int 1
    %int2_518 = torch.constant.int 2
    %1156 = torch.aten.transpose.int %1155, %int1_517, %int2_518 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_519 = torch.constant.int 1
    %int2_520 = torch.constant.int 2
    %1157 = torch.aten.transpose.int %1151, %int1_519, %int2_520 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_521 = torch.constant.int 2
    %int3_522 = torch.constant.int 3
    %1158 = torch.aten.transpose.int %1157, %int2_521, %int3_522 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %1159 = torch.aten.matmul %1148, %1158 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1160 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1161 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1162 = torch.aten.pow.Tensor_Tensor %1160, %1161 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %1163 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_523 = torch.constant.int 0
    %1164 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_524 = torch.constant.none
    %float0.000000e00_525 = torch.constant.float 0.000000e+00
    %1165 = torch.aten.full %1164, %float0.000000e00_525, %none_524, %none_524, %none_524, %none_524 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_526 = torch.constant.int 1
    %1166 = torch.aten.add.Tensor %1165, %1162, %int1_526 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %1167 = torch.aten.div.Tensor %1159, %1166 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1168 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_527 = torch.constant.int 6
    %none_528 = torch.constant.none
    %false_529 = torch.constant.bool false
    %1169 = torch.aten.to.dtype %1167, %int6_527, %false_529, %false_529, %none_528 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1170 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %1171 = torch.aten.where.self %1168, %1169, %1170 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_530 = torch.constant.int 3
    %none_531 = torch.constant.none
    %1172 = torch.aten.softmax.int %1171, %int3_530, %none_531 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_532 = torch.constant.int 6
    %none_533 = torch.constant.none
    %false_534 = torch.constant.bool false
    %1173 = torch.aten.to.dtype %1172, %int6_532, %false_534, %false_534, %none_533 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1174 = torch.aten.matmul %1173, %1156 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_535 = torch.constant.int 1
    %int2_536 = torch.constant.int 2
    %1175 = torch.aten.transpose.int %1174, %int1_535, %int2_536 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %1176 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_537 = torch.constant.int 1
    %int7_538 = torch.constant.int 7
    %int1600_539 = torch.constant.int 1600
    %1177 = torch.prim.ListConstruct %int1_537, %int7_538, %int1600_539 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1178 = torch.aten.reshape %1175, %1177 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %1179 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_540 = torch.constant.int 7
    %int1600_541 = torch.constant.int 1600
    %1180 = torch.prim.ListConstruct %int7_540, %int1600_541 : (!torch.int, !torch.int) -> !torch.list<int>
    %1181 = torch.aten.reshape %1178, %1180 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_542 = torch.constant.int 0
    %int1_543 = torch.constant.int 1
    %1182 = torch.aten.mm %1181, %78 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1183 = torch.aten.add.Tensor %1182, %79, %int1_543 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1184 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_544 = torch.constant.int 1
    %int7_545 = torch.constant.int 7
    %int1600_546 = torch.constant.int 1600
    %1185 = torch.prim.ListConstruct %int1_544, %int7_545, %int1600_546 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1186 = torch.aten.reshape %1183, %1185 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_547 = torch.constant.int 1
    %1187 = torch.aten.add.Tensor %1186, %1131, %int1_547 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_548 = torch.constant.float 9.9999997473787516E-6
    %int1600_549 = torch.constant.int 1600
    %1188 = torch.prim.ListConstruct %int1600_549 : (!torch.int) -> !torch.list<int>
    %result0_550, %result1_551, %result2_552 = torch.aten.native_layer_norm %1187, %1188, %80, %81, %float9.999990e-06_548 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1189 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_553 = torch.constant.int 7
    %int1600_554 = torch.constant.int 1600
    %1190 = torch.prim.ListConstruct %int7_553, %int1600_554 : (!torch.int, !torch.int) -> !torch.list<int>
    %1191 = torch.aten.reshape %result0_550, %1190 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_555 = torch.constant.int 0
    %int1_556 = torch.constant.int 1
    %1192 = torch.aten.mm %1191, %82 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %1193 = torch.aten.add.Tensor %1192, %83, %int1_556 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %1194 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_557 = torch.constant.int 1
    %int7_558 = torch.constant.int 7
    %int6400_559 = torch.constant.int 6400
    %1195 = torch.prim.ListConstruct %int1_557, %int7_558, %int6400_559 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1196 = torch.aten.reshape %1193, %1195 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %1197 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1198 = torch.aten.mul.Tensor %1196, %1197 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1199 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %1200 = torch.aten.pow.Tensor_Tensor %1196, %1199 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1201 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1202 = torch.aten.mul.Tensor %1200, %1201 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_560 = torch.constant.int 1
    %1203 = torch.aten.add.Tensor %1196, %1202, %int1_560 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1204 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %1205 = torch.aten.mul.Tensor %1203, %1204 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1206 = torch.aten.tanh %1205 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1207 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_561 = torch.constant.int 1
    %1208 = torch.aten.add.Tensor %1206, %1207, %int1_561 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1209 = torch.aten.mul.Tensor %1198, %1208 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1210 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_562 = torch.constant.int 7
    %int6400_563 = torch.constant.int 6400
    %1211 = torch.prim.ListConstruct %int7_562, %int6400_563 : (!torch.int, !torch.int) -> !torch.list<int>
    %1212 = torch.aten.reshape %1209, %1211 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_564 = torch.constant.int 0
    %int1_565 = torch.constant.int 1
    %1213 = torch.aten.mm %1212, %84 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1214 = torch.aten.add.Tensor %1213, %85, %int1_565 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1215 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_566 = torch.constant.int 1
    %int7_567 = torch.constant.int 7
    %int1600_568 = torch.constant.int 1600
    %1216 = torch.prim.ListConstruct %int1_566, %int7_567, %int1600_568 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1217 = torch.aten.reshape %1214, %1216 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_569 = torch.constant.int 1
    %1218 = torch.aten.add.Tensor %1187, %1217, %int1_569 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_570 = torch.constant.float 9.9999997473787516E-6
    %int1600_571 = torch.constant.int 1600
    %1219 = torch.prim.ListConstruct %int1600_571 : (!torch.int) -> !torch.list<int>
    %result0_572, %result1_573, %result2_574 = torch.aten.native_layer_norm %1218, %1219, %86, %87, %float9.999990e-06_570 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1220 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_575 = torch.constant.int 7
    %int1600_576 = torch.constant.int 1600
    %1221 = torch.prim.ListConstruct %int7_575, %int1600_576 : (!torch.int, !torch.int) -> !torch.list<int>
    %1222 = torch.aten.reshape %result0_572, %1221 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_577 = torch.constant.int 0
    %int1_578 = torch.constant.int 1
    %1223 = torch.aten.mm %1222, %88 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %1224 = torch.aten.add.Tensor %1223, %89, %int1_578 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %1225 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_579 = torch.constant.int 1
    %int7_580 = torch.constant.int 7
    %int4800_581 = torch.constant.int 4800
    %1226 = torch.prim.ListConstruct %int1_579, %int7_580, %int4800_581 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1227 = torch.aten.reshape %1224, %1226 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %1228 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %1229 = torch.prim.tolist(%1228) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_582 = torch.constant.int 2
    %1230 = torch.aten.split_with_sizes %1227, %1229, %int2_582 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %1231:3 = torch.prim.ListUnpack %1230 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %1232 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_583 = torch.constant.int 1
    %int7_584 = torch.constant.int 7
    %int25_585 = torch.constant.int 25
    %int64_586 = torch.constant.int 64
    %1233 = torch.prim.ListConstruct %int1_583, %int7_584, %int25_585, %int64_586 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1234 = torch.aten.reshape %1231#0, %1233 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_587 = torch.constant.int 1
    %int2_588 = torch.constant.int 2
    %1235 = torch.aten.transpose.int %1234, %int1_587, %int2_588 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1236 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_589 = torch.constant.int 1
    %int7_590 = torch.constant.int 7
    %int25_591 = torch.constant.int 25
    %int64_592 = torch.constant.int 64
    %1237 = torch.prim.ListConstruct %int1_589, %int7_590, %int25_591, %int64_592 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1238 = torch.aten.reshape %1231#1, %1237 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_593 = torch.constant.int 1
    %int2_594 = torch.constant.int 2
    %1239 = torch.aten.transpose.int %1238, %int1_593, %int2_594 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1240 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_595 = torch.constant.int 1
    %int7_596 = torch.constant.int 7
    %int25_597 = torch.constant.int 25
    %int64_598 = torch.constant.int 64
    %1241 = torch.prim.ListConstruct %int1_595, %int7_596, %int25_597, %int64_598 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1242 = torch.aten.reshape %1231#2, %1241 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_599 = torch.constant.int 1
    %int2_600 = torch.constant.int 2
    %1243 = torch.aten.transpose.int %1242, %int1_599, %int2_600 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_601 = torch.constant.int 1
    %int2_602 = torch.constant.int 2
    %1244 = torch.aten.transpose.int %1238, %int1_601, %int2_602 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_603 = torch.constant.int 2
    %int3_604 = torch.constant.int 3
    %1245 = torch.aten.transpose.int %1244, %int2_603, %int3_604 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %1246 = torch.aten.matmul %1235, %1245 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1247 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1248 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1249 = torch.aten.pow.Tensor_Tensor %1247, %1248 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %1250 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_605 = torch.constant.int 0
    %1251 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_606 = torch.constant.none
    %float0.000000e00_607 = torch.constant.float 0.000000e+00
    %1252 = torch.aten.full %1251, %float0.000000e00_607, %none_606, %none_606, %none_606, %none_606 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_608 = torch.constant.int 1
    %1253 = torch.aten.add.Tensor %1252, %1249, %int1_608 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %1254 = torch.aten.div.Tensor %1246, %1253 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1255 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_609 = torch.constant.int 6
    %none_610 = torch.constant.none
    %false_611 = torch.constant.bool false
    %1256 = torch.aten.to.dtype %1254, %int6_609, %false_611, %false_611, %none_610 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1257 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %1258 = torch.aten.where.self %1255, %1256, %1257 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_612 = torch.constant.int 3
    %none_613 = torch.constant.none
    %1259 = torch.aten.softmax.int %1258, %int3_612, %none_613 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_614 = torch.constant.int 6
    %none_615 = torch.constant.none
    %false_616 = torch.constant.bool false
    %1260 = torch.aten.to.dtype %1259, %int6_614, %false_616, %false_616, %none_615 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1261 = torch.aten.matmul %1260, %1243 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_617 = torch.constant.int 1
    %int2_618 = torch.constant.int 2
    %1262 = torch.aten.transpose.int %1261, %int1_617, %int2_618 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %1263 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_619 = torch.constant.int 1
    %int7_620 = torch.constant.int 7
    %int1600_621 = torch.constant.int 1600
    %1264 = torch.prim.ListConstruct %int1_619, %int7_620, %int1600_621 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1265 = torch.aten.reshape %1262, %1264 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %1266 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_622 = torch.constant.int 7
    %int1600_623 = torch.constant.int 1600
    %1267 = torch.prim.ListConstruct %int7_622, %int1600_623 : (!torch.int, !torch.int) -> !torch.list<int>
    %1268 = torch.aten.reshape %1265, %1267 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_624 = torch.constant.int 0
    %int1_625 = torch.constant.int 1
    %1269 = torch.aten.mm %1268, %90 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1270 = torch.aten.add.Tensor %1269, %91, %int1_625 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1271 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_626 = torch.constant.int 1
    %int7_627 = torch.constant.int 7
    %int1600_628 = torch.constant.int 1600
    %1272 = torch.prim.ListConstruct %int1_626, %int7_627, %int1600_628 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1273 = torch.aten.reshape %1270, %1272 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_629 = torch.constant.int 1
    %1274 = torch.aten.add.Tensor %1273, %1218, %int1_629 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_630 = torch.constant.float 9.9999997473787516E-6
    %int1600_631 = torch.constant.int 1600
    %1275 = torch.prim.ListConstruct %int1600_631 : (!torch.int) -> !torch.list<int>
    %result0_632, %result1_633, %result2_634 = torch.aten.native_layer_norm %1274, %1275, %92, %93, %float9.999990e-06_630 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1276 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_635 = torch.constant.int 7
    %int1600_636 = torch.constant.int 1600
    %1277 = torch.prim.ListConstruct %int7_635, %int1600_636 : (!torch.int, !torch.int) -> !torch.list<int>
    %1278 = torch.aten.reshape %result0_632, %1277 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_637 = torch.constant.int 0
    %int1_638 = torch.constant.int 1
    %1279 = torch.aten.mm %1278, %94 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %1280 = torch.aten.add.Tensor %1279, %95, %int1_638 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %1281 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_639 = torch.constant.int 1
    %int7_640 = torch.constant.int 7
    %int6400_641 = torch.constant.int 6400
    %1282 = torch.prim.ListConstruct %int1_639, %int7_640, %int6400_641 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1283 = torch.aten.reshape %1280, %1282 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %1284 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1285 = torch.aten.mul.Tensor %1283, %1284 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1286 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %1287 = torch.aten.pow.Tensor_Tensor %1283, %1286 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1288 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1289 = torch.aten.mul.Tensor %1287, %1288 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_642 = torch.constant.int 1
    %1290 = torch.aten.add.Tensor %1283, %1289, %int1_642 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1291 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %1292 = torch.aten.mul.Tensor %1290, %1291 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1293 = torch.aten.tanh %1292 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1294 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_643 = torch.constant.int 1
    %1295 = torch.aten.add.Tensor %1293, %1294, %int1_643 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1296 = torch.aten.mul.Tensor %1285, %1295 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1297 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_644 = torch.constant.int 7
    %int6400_645 = torch.constant.int 6400
    %1298 = torch.prim.ListConstruct %int7_644, %int6400_645 : (!torch.int, !torch.int) -> !torch.list<int>
    %1299 = torch.aten.reshape %1296, %1298 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_646 = torch.constant.int 0
    %int1_647 = torch.constant.int 1
    %1300 = torch.aten.mm %1299, %96 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1301 = torch.aten.add.Tensor %1300, %97, %int1_647 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1302 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_648 = torch.constant.int 1
    %int7_649 = torch.constant.int 7
    %int1600_650 = torch.constant.int 1600
    %1303 = torch.prim.ListConstruct %int1_648, %int7_649, %int1600_650 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1304 = torch.aten.reshape %1301, %1303 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_651 = torch.constant.int 1
    %1305 = torch.aten.add.Tensor %1274, %1304, %int1_651 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_652 = torch.constant.float 9.9999997473787516E-6
    %int1600_653 = torch.constant.int 1600
    %1306 = torch.prim.ListConstruct %int1600_653 : (!torch.int) -> !torch.list<int>
    %result0_654, %result1_655, %result2_656 = torch.aten.native_layer_norm %1305, %1306, %98, %99, %float9.999990e-06_652 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1307 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_657 = torch.constant.int 7
    %int1600_658 = torch.constant.int 1600
    %1308 = torch.prim.ListConstruct %int7_657, %int1600_658 : (!torch.int, !torch.int) -> !torch.list<int>
    %1309 = torch.aten.reshape %result0_654, %1308 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_659 = torch.constant.int 0
    %int1_660 = torch.constant.int 1
    %1310 = torch.aten.mm %1309, %100 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %1311 = torch.aten.add.Tensor %1310, %101, %int1_660 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %1312 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_661 = torch.constant.int 1
    %int7_662 = torch.constant.int 7
    %int4800_663 = torch.constant.int 4800
    %1313 = torch.prim.ListConstruct %int1_661, %int7_662, %int4800_663 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1314 = torch.aten.reshape %1311, %1313 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %1315 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %1316 = torch.prim.tolist(%1315) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_664 = torch.constant.int 2
    %1317 = torch.aten.split_with_sizes %1314, %1316, %int2_664 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %1318:3 = torch.prim.ListUnpack %1317 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %1319 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_665 = torch.constant.int 1
    %int7_666 = torch.constant.int 7
    %int25_667 = torch.constant.int 25
    %int64_668 = torch.constant.int 64
    %1320 = torch.prim.ListConstruct %int1_665, %int7_666, %int25_667, %int64_668 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1321 = torch.aten.reshape %1318#0, %1320 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_669 = torch.constant.int 1
    %int2_670 = torch.constant.int 2
    %1322 = torch.aten.transpose.int %1321, %int1_669, %int2_670 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1323 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_671 = torch.constant.int 1
    %int7_672 = torch.constant.int 7
    %int25_673 = torch.constant.int 25
    %int64_674 = torch.constant.int 64
    %1324 = torch.prim.ListConstruct %int1_671, %int7_672, %int25_673, %int64_674 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1325 = torch.aten.reshape %1318#1, %1324 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_675 = torch.constant.int 1
    %int2_676 = torch.constant.int 2
    %1326 = torch.aten.transpose.int %1325, %int1_675, %int2_676 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1327 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_677 = torch.constant.int 1
    %int7_678 = torch.constant.int 7
    %int25_679 = torch.constant.int 25
    %int64_680 = torch.constant.int 64
    %1328 = torch.prim.ListConstruct %int1_677, %int7_678, %int25_679, %int64_680 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1329 = torch.aten.reshape %1318#2, %1328 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_681 = torch.constant.int 1
    %int2_682 = torch.constant.int 2
    %1330 = torch.aten.transpose.int %1329, %int1_681, %int2_682 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_683 = torch.constant.int 1
    %int2_684 = torch.constant.int 2
    %1331 = torch.aten.transpose.int %1325, %int1_683, %int2_684 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_685 = torch.constant.int 2
    %int3_686 = torch.constant.int 3
    %1332 = torch.aten.transpose.int %1331, %int2_685, %int3_686 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %1333 = torch.aten.matmul %1322, %1332 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1334 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1335 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1336 = torch.aten.pow.Tensor_Tensor %1334, %1335 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %1337 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_687 = torch.constant.int 0
    %1338 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_688 = torch.constant.none
    %float0.000000e00_689 = torch.constant.float 0.000000e+00
    %1339 = torch.aten.full %1338, %float0.000000e00_689, %none_688, %none_688, %none_688, %none_688 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_690 = torch.constant.int 1
    %1340 = torch.aten.add.Tensor %1339, %1336, %int1_690 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %1341 = torch.aten.div.Tensor %1333, %1340 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1342 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_691 = torch.constant.int 6
    %none_692 = torch.constant.none
    %false_693 = torch.constant.bool false
    %1343 = torch.aten.to.dtype %1341, %int6_691, %false_693, %false_693, %none_692 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1344 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %1345 = torch.aten.where.self %1342, %1343, %1344 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_694 = torch.constant.int 3
    %none_695 = torch.constant.none
    %1346 = torch.aten.softmax.int %1345, %int3_694, %none_695 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_696 = torch.constant.int 6
    %none_697 = torch.constant.none
    %false_698 = torch.constant.bool false
    %1347 = torch.aten.to.dtype %1346, %int6_696, %false_698, %false_698, %none_697 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1348 = torch.aten.matmul %1347, %1330 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_699 = torch.constant.int 1
    %int2_700 = torch.constant.int 2
    %1349 = torch.aten.transpose.int %1348, %int1_699, %int2_700 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %1350 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_701 = torch.constant.int 1
    %int7_702 = torch.constant.int 7
    %int1600_703 = torch.constant.int 1600
    %1351 = torch.prim.ListConstruct %int1_701, %int7_702, %int1600_703 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1352 = torch.aten.reshape %1349, %1351 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %1353 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_704 = torch.constant.int 7
    %int1600_705 = torch.constant.int 1600
    %1354 = torch.prim.ListConstruct %int7_704, %int1600_705 : (!torch.int, !torch.int) -> !torch.list<int>
    %1355 = torch.aten.reshape %1352, %1354 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_706 = torch.constant.int 0
    %int1_707 = torch.constant.int 1
    %1356 = torch.aten.mm %1355, %102 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1357 = torch.aten.add.Tensor %1356, %103, %int1_707 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1358 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_708 = torch.constant.int 1
    %int7_709 = torch.constant.int 7
    %int1600_710 = torch.constant.int 1600
    %1359 = torch.prim.ListConstruct %int1_708, %int7_709, %int1600_710 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1360 = torch.aten.reshape %1357, %1359 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_711 = torch.constant.int 1
    %1361 = torch.aten.add.Tensor %1360, %1305, %int1_711 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_712 = torch.constant.float 9.9999997473787516E-6
    %int1600_713 = torch.constant.int 1600
    %1362 = torch.prim.ListConstruct %int1600_713 : (!torch.int) -> !torch.list<int>
    %result0_714, %result1_715, %result2_716 = torch.aten.native_layer_norm %1361, %1362, %104, %105, %float9.999990e-06_712 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1363 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_717 = torch.constant.int 7
    %int1600_718 = torch.constant.int 1600
    %1364 = torch.prim.ListConstruct %int7_717, %int1600_718 : (!torch.int, !torch.int) -> !torch.list<int>
    %1365 = torch.aten.reshape %result0_714, %1364 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_719 = torch.constant.int 0
    %int1_720 = torch.constant.int 1
    %1366 = torch.aten.mm %1365, %106 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %1367 = torch.aten.add.Tensor %1366, %107, %int1_720 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %1368 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_721 = torch.constant.int 1
    %int7_722 = torch.constant.int 7
    %int6400_723 = torch.constant.int 6400
    %1369 = torch.prim.ListConstruct %int1_721, %int7_722, %int6400_723 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1370 = torch.aten.reshape %1367, %1369 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %1371 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1372 = torch.aten.mul.Tensor %1370, %1371 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1373 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %1374 = torch.aten.pow.Tensor_Tensor %1370, %1373 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1375 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1376 = torch.aten.mul.Tensor %1374, %1375 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_724 = torch.constant.int 1
    %1377 = torch.aten.add.Tensor %1370, %1376, %int1_724 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1378 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %1379 = torch.aten.mul.Tensor %1377, %1378 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1380 = torch.aten.tanh %1379 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1381 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_725 = torch.constant.int 1
    %1382 = torch.aten.add.Tensor %1380, %1381, %int1_725 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1383 = torch.aten.mul.Tensor %1372, %1382 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1384 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_726 = torch.constant.int 7
    %int6400_727 = torch.constant.int 6400
    %1385 = torch.prim.ListConstruct %int7_726, %int6400_727 : (!torch.int, !torch.int) -> !torch.list<int>
    %1386 = torch.aten.reshape %1383, %1385 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_728 = torch.constant.int 0
    %int1_729 = torch.constant.int 1
    %1387 = torch.aten.mm %1386, %108 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1388 = torch.aten.add.Tensor %1387, %109, %int1_729 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1389 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_730 = torch.constant.int 1
    %int7_731 = torch.constant.int 7
    %int1600_732 = torch.constant.int 1600
    %1390 = torch.prim.ListConstruct %int1_730, %int7_731, %int1600_732 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1391 = torch.aten.reshape %1388, %1390 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_733 = torch.constant.int 1
    %1392 = torch.aten.add.Tensor %1361, %1391, %int1_733 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_734 = torch.constant.float 9.9999997473787516E-6
    %int1600_735 = torch.constant.int 1600
    %1393 = torch.prim.ListConstruct %int1600_735 : (!torch.int) -> !torch.list<int>
    %result0_736, %result1_737, %result2_738 = torch.aten.native_layer_norm %1392, %1393, %110, %111, %float9.999990e-06_734 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1394 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_739 = torch.constant.int 7
    %int1600_740 = torch.constant.int 1600
    %1395 = torch.prim.ListConstruct %int7_739, %int1600_740 : (!torch.int, !torch.int) -> !torch.list<int>
    %1396 = torch.aten.reshape %result0_736, %1395 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_741 = torch.constant.int 0
    %int1_742 = torch.constant.int 1
    %1397 = torch.aten.mm %1396, %112 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %1398 = torch.aten.add.Tensor %1397, %113, %int1_742 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %1399 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_743 = torch.constant.int 1
    %int7_744 = torch.constant.int 7
    %int4800_745 = torch.constant.int 4800
    %1400 = torch.prim.ListConstruct %int1_743, %int7_744, %int4800_745 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1401 = torch.aten.reshape %1398, %1400 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %1402 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %1403 = torch.prim.tolist(%1402) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_746 = torch.constant.int 2
    %1404 = torch.aten.split_with_sizes %1401, %1403, %int2_746 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %1405:3 = torch.prim.ListUnpack %1404 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %1406 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_747 = torch.constant.int 1
    %int7_748 = torch.constant.int 7
    %int25_749 = torch.constant.int 25
    %int64_750 = torch.constant.int 64
    %1407 = torch.prim.ListConstruct %int1_747, %int7_748, %int25_749, %int64_750 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1408 = torch.aten.reshape %1405#0, %1407 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_751 = torch.constant.int 1
    %int2_752 = torch.constant.int 2
    %1409 = torch.aten.transpose.int %1408, %int1_751, %int2_752 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1410 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_753 = torch.constant.int 1
    %int7_754 = torch.constant.int 7
    %int25_755 = torch.constant.int 25
    %int64_756 = torch.constant.int 64
    %1411 = torch.prim.ListConstruct %int1_753, %int7_754, %int25_755, %int64_756 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1412 = torch.aten.reshape %1405#1, %1411 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_757 = torch.constant.int 1
    %int2_758 = torch.constant.int 2
    %1413 = torch.aten.transpose.int %1412, %int1_757, %int2_758 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1414 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_759 = torch.constant.int 1
    %int7_760 = torch.constant.int 7
    %int25_761 = torch.constant.int 25
    %int64_762 = torch.constant.int 64
    %1415 = torch.prim.ListConstruct %int1_759, %int7_760, %int25_761, %int64_762 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1416 = torch.aten.reshape %1405#2, %1415 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_763 = torch.constant.int 1
    %int2_764 = torch.constant.int 2
    %1417 = torch.aten.transpose.int %1416, %int1_763, %int2_764 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_765 = torch.constant.int 1
    %int2_766 = torch.constant.int 2
    %1418 = torch.aten.transpose.int %1412, %int1_765, %int2_766 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_767 = torch.constant.int 2
    %int3_768 = torch.constant.int 3
    %1419 = torch.aten.transpose.int %1418, %int2_767, %int3_768 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %1420 = torch.aten.matmul %1409, %1419 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1421 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1422 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1423 = torch.aten.pow.Tensor_Tensor %1421, %1422 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %1424 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_769 = torch.constant.int 0
    %1425 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_770 = torch.constant.none
    %float0.000000e00_771 = torch.constant.float 0.000000e+00
    %1426 = torch.aten.full %1425, %float0.000000e00_771, %none_770, %none_770, %none_770, %none_770 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_772 = torch.constant.int 1
    %1427 = torch.aten.add.Tensor %1426, %1423, %int1_772 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %1428 = torch.aten.div.Tensor %1420, %1427 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1429 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_773 = torch.constant.int 6
    %none_774 = torch.constant.none
    %false_775 = torch.constant.bool false
    %1430 = torch.aten.to.dtype %1428, %int6_773, %false_775, %false_775, %none_774 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1431 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %1432 = torch.aten.where.self %1429, %1430, %1431 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_776 = torch.constant.int 3
    %none_777 = torch.constant.none
    %1433 = torch.aten.softmax.int %1432, %int3_776, %none_777 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_778 = torch.constant.int 6
    %none_779 = torch.constant.none
    %false_780 = torch.constant.bool false
    %1434 = torch.aten.to.dtype %1433, %int6_778, %false_780, %false_780, %none_779 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1435 = torch.aten.matmul %1434, %1417 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_781 = torch.constant.int 1
    %int2_782 = torch.constant.int 2
    %1436 = torch.aten.transpose.int %1435, %int1_781, %int2_782 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %1437 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_783 = torch.constant.int 1
    %int7_784 = torch.constant.int 7
    %int1600_785 = torch.constant.int 1600
    %1438 = torch.prim.ListConstruct %int1_783, %int7_784, %int1600_785 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1439 = torch.aten.reshape %1436, %1438 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %1440 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_786 = torch.constant.int 7
    %int1600_787 = torch.constant.int 1600
    %1441 = torch.prim.ListConstruct %int7_786, %int1600_787 : (!torch.int, !torch.int) -> !torch.list<int>
    %1442 = torch.aten.reshape %1439, %1441 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_788 = torch.constant.int 0
    %int1_789 = torch.constant.int 1
    %1443 = torch.aten.mm %1442, %114 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1444 = torch.aten.add.Tensor %1443, %115, %int1_789 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1445 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_790 = torch.constant.int 1
    %int7_791 = torch.constant.int 7
    %int1600_792 = torch.constant.int 1600
    %1446 = torch.prim.ListConstruct %int1_790, %int7_791, %int1600_792 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1447 = torch.aten.reshape %1444, %1446 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_793 = torch.constant.int 1
    %1448 = torch.aten.add.Tensor %1447, %1392, %int1_793 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_794 = torch.constant.float 9.9999997473787516E-6
    %int1600_795 = torch.constant.int 1600
    %1449 = torch.prim.ListConstruct %int1600_795 : (!torch.int) -> !torch.list<int>
    %result0_796, %result1_797, %result2_798 = torch.aten.native_layer_norm %1448, %1449, %116, %117, %float9.999990e-06_794 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1450 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_799 = torch.constant.int 7
    %int1600_800 = torch.constant.int 1600
    %1451 = torch.prim.ListConstruct %int7_799, %int1600_800 : (!torch.int, !torch.int) -> !torch.list<int>
    %1452 = torch.aten.reshape %result0_796, %1451 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_801 = torch.constant.int 0
    %int1_802 = torch.constant.int 1
    %1453 = torch.aten.mm %1452, %118 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %1454 = torch.aten.add.Tensor %1453, %119, %int1_802 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %1455 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_803 = torch.constant.int 1
    %int7_804 = torch.constant.int 7
    %int6400_805 = torch.constant.int 6400
    %1456 = torch.prim.ListConstruct %int1_803, %int7_804, %int6400_805 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1457 = torch.aten.reshape %1454, %1456 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %1458 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1459 = torch.aten.mul.Tensor %1457, %1458 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1460 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %1461 = torch.aten.pow.Tensor_Tensor %1457, %1460 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1462 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1463 = torch.aten.mul.Tensor %1461, %1462 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_806 = torch.constant.int 1
    %1464 = torch.aten.add.Tensor %1457, %1463, %int1_806 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1465 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %1466 = torch.aten.mul.Tensor %1464, %1465 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1467 = torch.aten.tanh %1466 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1468 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_807 = torch.constant.int 1
    %1469 = torch.aten.add.Tensor %1467, %1468, %int1_807 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1470 = torch.aten.mul.Tensor %1459, %1469 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1471 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_808 = torch.constant.int 7
    %int6400_809 = torch.constant.int 6400
    %1472 = torch.prim.ListConstruct %int7_808, %int6400_809 : (!torch.int, !torch.int) -> !torch.list<int>
    %1473 = torch.aten.reshape %1470, %1472 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_810 = torch.constant.int 0
    %int1_811 = torch.constant.int 1
    %1474 = torch.aten.mm %1473, %120 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1475 = torch.aten.add.Tensor %1474, %121, %int1_811 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1476 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_812 = torch.constant.int 1
    %int7_813 = torch.constant.int 7
    %int1600_814 = torch.constant.int 1600
    %1477 = torch.prim.ListConstruct %int1_812, %int7_813, %int1600_814 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1478 = torch.aten.reshape %1475, %1477 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_815 = torch.constant.int 1
    %1479 = torch.aten.add.Tensor %1448, %1478, %int1_815 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_816 = torch.constant.float 9.9999997473787516E-6
    %int1600_817 = torch.constant.int 1600
    %1480 = torch.prim.ListConstruct %int1600_817 : (!torch.int) -> !torch.list<int>
    %result0_818, %result1_819, %result2_820 = torch.aten.native_layer_norm %1479, %1480, %122, %123, %float9.999990e-06_816 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1481 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_821 = torch.constant.int 7
    %int1600_822 = torch.constant.int 1600
    %1482 = torch.prim.ListConstruct %int7_821, %int1600_822 : (!torch.int, !torch.int) -> !torch.list<int>
    %1483 = torch.aten.reshape %result0_818, %1482 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_823 = torch.constant.int 0
    %int1_824 = torch.constant.int 1
    %1484 = torch.aten.mm %1483, %124 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %1485 = torch.aten.add.Tensor %1484, %125, %int1_824 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %1486 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_825 = torch.constant.int 1
    %int7_826 = torch.constant.int 7
    %int4800_827 = torch.constant.int 4800
    %1487 = torch.prim.ListConstruct %int1_825, %int7_826, %int4800_827 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1488 = torch.aten.reshape %1485, %1487 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %1489 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %1490 = torch.prim.tolist(%1489) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_828 = torch.constant.int 2
    %1491 = torch.aten.split_with_sizes %1488, %1490, %int2_828 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %1492:3 = torch.prim.ListUnpack %1491 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %1493 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_829 = torch.constant.int 1
    %int7_830 = torch.constant.int 7
    %int25_831 = torch.constant.int 25
    %int64_832 = torch.constant.int 64
    %1494 = torch.prim.ListConstruct %int1_829, %int7_830, %int25_831, %int64_832 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1495 = torch.aten.reshape %1492#0, %1494 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_833 = torch.constant.int 1
    %int2_834 = torch.constant.int 2
    %1496 = torch.aten.transpose.int %1495, %int1_833, %int2_834 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1497 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_835 = torch.constant.int 1
    %int7_836 = torch.constant.int 7
    %int25_837 = torch.constant.int 25
    %int64_838 = torch.constant.int 64
    %1498 = torch.prim.ListConstruct %int1_835, %int7_836, %int25_837, %int64_838 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1499 = torch.aten.reshape %1492#1, %1498 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_839 = torch.constant.int 1
    %int2_840 = torch.constant.int 2
    %1500 = torch.aten.transpose.int %1499, %int1_839, %int2_840 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1501 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_841 = torch.constant.int 1
    %int7_842 = torch.constant.int 7
    %int25_843 = torch.constant.int 25
    %int64_844 = torch.constant.int 64
    %1502 = torch.prim.ListConstruct %int1_841, %int7_842, %int25_843, %int64_844 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1503 = torch.aten.reshape %1492#2, %1502 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_845 = torch.constant.int 1
    %int2_846 = torch.constant.int 2
    %1504 = torch.aten.transpose.int %1503, %int1_845, %int2_846 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_847 = torch.constant.int 1
    %int2_848 = torch.constant.int 2
    %1505 = torch.aten.transpose.int %1499, %int1_847, %int2_848 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_849 = torch.constant.int 2
    %int3_850 = torch.constant.int 3
    %1506 = torch.aten.transpose.int %1505, %int2_849, %int3_850 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %1507 = torch.aten.matmul %1496, %1506 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1508 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1509 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1510 = torch.aten.pow.Tensor_Tensor %1508, %1509 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %1511 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_851 = torch.constant.int 0
    %1512 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_852 = torch.constant.none
    %float0.000000e00_853 = torch.constant.float 0.000000e+00
    %1513 = torch.aten.full %1512, %float0.000000e00_853, %none_852, %none_852, %none_852, %none_852 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_854 = torch.constant.int 1
    %1514 = torch.aten.add.Tensor %1513, %1510, %int1_854 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %1515 = torch.aten.div.Tensor %1507, %1514 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1516 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_855 = torch.constant.int 6
    %none_856 = torch.constant.none
    %false_857 = torch.constant.bool false
    %1517 = torch.aten.to.dtype %1515, %int6_855, %false_857, %false_857, %none_856 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1518 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %1519 = torch.aten.where.self %1516, %1517, %1518 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_858 = torch.constant.int 3
    %none_859 = torch.constant.none
    %1520 = torch.aten.softmax.int %1519, %int3_858, %none_859 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_860 = torch.constant.int 6
    %none_861 = torch.constant.none
    %false_862 = torch.constant.bool false
    %1521 = torch.aten.to.dtype %1520, %int6_860, %false_862, %false_862, %none_861 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1522 = torch.aten.matmul %1521, %1504 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_863 = torch.constant.int 1
    %int2_864 = torch.constant.int 2
    %1523 = torch.aten.transpose.int %1522, %int1_863, %int2_864 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %1524 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_865 = torch.constant.int 1
    %int7_866 = torch.constant.int 7
    %int1600_867 = torch.constant.int 1600
    %1525 = torch.prim.ListConstruct %int1_865, %int7_866, %int1600_867 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1526 = torch.aten.reshape %1523, %1525 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %1527 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_868 = torch.constant.int 7
    %int1600_869 = torch.constant.int 1600
    %1528 = torch.prim.ListConstruct %int7_868, %int1600_869 : (!torch.int, !torch.int) -> !torch.list<int>
    %1529 = torch.aten.reshape %1526, %1528 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_870 = torch.constant.int 0
    %int1_871 = torch.constant.int 1
    %1530 = torch.aten.mm %1529, %126 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1531 = torch.aten.add.Tensor %1530, %127, %int1_871 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1532 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_872 = torch.constant.int 1
    %int7_873 = torch.constant.int 7
    %int1600_874 = torch.constant.int 1600
    %1533 = torch.prim.ListConstruct %int1_872, %int7_873, %int1600_874 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1534 = torch.aten.reshape %1531, %1533 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_875 = torch.constant.int 1
    %1535 = torch.aten.add.Tensor %1534, %1479, %int1_875 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_876 = torch.constant.float 9.9999997473787516E-6
    %int1600_877 = torch.constant.int 1600
    %1536 = torch.prim.ListConstruct %int1600_877 : (!torch.int) -> !torch.list<int>
    %result0_878, %result1_879, %result2_880 = torch.aten.native_layer_norm %1535, %1536, %128, %129, %float9.999990e-06_876 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1537 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_881 = torch.constant.int 7
    %int1600_882 = torch.constant.int 1600
    %1538 = torch.prim.ListConstruct %int7_881, %int1600_882 : (!torch.int, !torch.int) -> !torch.list<int>
    %1539 = torch.aten.reshape %result0_878, %1538 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_883 = torch.constant.int 0
    %int1_884 = torch.constant.int 1
    %1540 = torch.aten.mm %1539, %130 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %1541 = torch.aten.add.Tensor %1540, %131, %int1_884 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %1542 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_885 = torch.constant.int 1
    %int7_886 = torch.constant.int 7
    %int6400_887 = torch.constant.int 6400
    %1543 = torch.prim.ListConstruct %int1_885, %int7_886, %int6400_887 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1544 = torch.aten.reshape %1541, %1543 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %1545 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1546 = torch.aten.mul.Tensor %1544, %1545 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1547 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %1548 = torch.aten.pow.Tensor_Tensor %1544, %1547 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1549 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1550 = torch.aten.mul.Tensor %1548, %1549 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_888 = torch.constant.int 1
    %1551 = torch.aten.add.Tensor %1544, %1550, %int1_888 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1552 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %1553 = torch.aten.mul.Tensor %1551, %1552 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1554 = torch.aten.tanh %1553 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1555 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_889 = torch.constant.int 1
    %1556 = torch.aten.add.Tensor %1554, %1555, %int1_889 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1557 = torch.aten.mul.Tensor %1546, %1556 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1558 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_890 = torch.constant.int 7
    %int6400_891 = torch.constant.int 6400
    %1559 = torch.prim.ListConstruct %int7_890, %int6400_891 : (!torch.int, !torch.int) -> !torch.list<int>
    %1560 = torch.aten.reshape %1557, %1559 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_892 = torch.constant.int 0
    %int1_893 = torch.constant.int 1
    %1561 = torch.aten.mm %1560, %132 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1562 = torch.aten.add.Tensor %1561, %133, %int1_893 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1563 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_894 = torch.constant.int 1
    %int7_895 = torch.constant.int 7
    %int1600_896 = torch.constant.int 1600
    %1564 = torch.prim.ListConstruct %int1_894, %int7_895, %int1600_896 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1565 = torch.aten.reshape %1562, %1564 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_897 = torch.constant.int 1
    %1566 = torch.aten.add.Tensor %1535, %1565, %int1_897 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_898 = torch.constant.float 9.9999997473787516E-6
    %int1600_899 = torch.constant.int 1600
    %1567 = torch.prim.ListConstruct %int1600_899 : (!torch.int) -> !torch.list<int>
    %result0_900, %result1_901, %result2_902 = torch.aten.native_layer_norm %1566, %1567, %134, %135, %float9.999990e-06_898 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1568 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_903 = torch.constant.int 7
    %int1600_904 = torch.constant.int 1600
    %1569 = torch.prim.ListConstruct %int7_903, %int1600_904 : (!torch.int, !torch.int) -> !torch.list<int>
    %1570 = torch.aten.reshape %result0_900, %1569 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_905 = torch.constant.int 0
    %int1_906 = torch.constant.int 1
    %1571 = torch.aten.mm %1570, %136 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %1572 = torch.aten.add.Tensor %1571, %137, %int1_906 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %1573 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_907 = torch.constant.int 1
    %int7_908 = torch.constant.int 7
    %int4800_909 = torch.constant.int 4800
    %1574 = torch.prim.ListConstruct %int1_907, %int7_908, %int4800_909 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1575 = torch.aten.reshape %1572, %1574 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %1576 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %1577 = torch.prim.tolist(%1576) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_910 = torch.constant.int 2
    %1578 = torch.aten.split_with_sizes %1575, %1577, %int2_910 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %1579:3 = torch.prim.ListUnpack %1578 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %1580 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_911 = torch.constant.int 1
    %int7_912 = torch.constant.int 7
    %int25_913 = torch.constant.int 25
    %int64_914 = torch.constant.int 64
    %1581 = torch.prim.ListConstruct %int1_911, %int7_912, %int25_913, %int64_914 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1582 = torch.aten.reshape %1579#0, %1581 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_915 = torch.constant.int 1
    %int2_916 = torch.constant.int 2
    %1583 = torch.aten.transpose.int %1582, %int1_915, %int2_916 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1584 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_917 = torch.constant.int 1
    %int7_918 = torch.constant.int 7
    %int25_919 = torch.constant.int 25
    %int64_920 = torch.constant.int 64
    %1585 = torch.prim.ListConstruct %int1_917, %int7_918, %int25_919, %int64_920 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1586 = torch.aten.reshape %1579#1, %1585 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_921 = torch.constant.int 1
    %int2_922 = torch.constant.int 2
    %1587 = torch.aten.transpose.int %1586, %int1_921, %int2_922 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1588 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_923 = torch.constant.int 1
    %int7_924 = torch.constant.int 7
    %int25_925 = torch.constant.int 25
    %int64_926 = torch.constant.int 64
    %1589 = torch.prim.ListConstruct %int1_923, %int7_924, %int25_925, %int64_926 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1590 = torch.aten.reshape %1579#2, %1589 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_927 = torch.constant.int 1
    %int2_928 = torch.constant.int 2
    %1591 = torch.aten.transpose.int %1590, %int1_927, %int2_928 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_929 = torch.constant.int 1
    %int2_930 = torch.constant.int 2
    %1592 = torch.aten.transpose.int %1586, %int1_929, %int2_930 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_931 = torch.constant.int 2
    %int3_932 = torch.constant.int 3
    %1593 = torch.aten.transpose.int %1592, %int2_931, %int3_932 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %1594 = torch.aten.matmul %1583, %1593 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1595 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1596 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1597 = torch.aten.pow.Tensor_Tensor %1595, %1596 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %1598 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_933 = torch.constant.int 0
    %1599 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_934 = torch.constant.none
    %float0.000000e00_935 = torch.constant.float 0.000000e+00
    %1600 = torch.aten.full %1599, %float0.000000e00_935, %none_934, %none_934, %none_934, %none_934 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_936 = torch.constant.int 1
    %1601 = torch.aten.add.Tensor %1600, %1597, %int1_936 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %1602 = torch.aten.div.Tensor %1594, %1601 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1603 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_937 = torch.constant.int 6
    %none_938 = torch.constant.none
    %false_939 = torch.constant.bool false
    %1604 = torch.aten.to.dtype %1602, %int6_937, %false_939, %false_939, %none_938 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1605 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %1606 = torch.aten.where.self %1603, %1604, %1605 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_940 = torch.constant.int 3
    %none_941 = torch.constant.none
    %1607 = torch.aten.softmax.int %1606, %int3_940, %none_941 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_942 = torch.constant.int 6
    %none_943 = torch.constant.none
    %false_944 = torch.constant.bool false
    %1608 = torch.aten.to.dtype %1607, %int6_942, %false_944, %false_944, %none_943 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1609 = torch.aten.matmul %1608, %1591 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_945 = torch.constant.int 1
    %int2_946 = torch.constant.int 2
    %1610 = torch.aten.transpose.int %1609, %int1_945, %int2_946 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %1611 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_947 = torch.constant.int 1
    %int7_948 = torch.constant.int 7
    %int1600_949 = torch.constant.int 1600
    %1612 = torch.prim.ListConstruct %int1_947, %int7_948, %int1600_949 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1613 = torch.aten.reshape %1610, %1612 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %1614 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_950 = torch.constant.int 7
    %int1600_951 = torch.constant.int 1600
    %1615 = torch.prim.ListConstruct %int7_950, %int1600_951 : (!torch.int, !torch.int) -> !torch.list<int>
    %1616 = torch.aten.reshape %1613, %1615 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_952 = torch.constant.int 0
    %int1_953 = torch.constant.int 1
    %1617 = torch.aten.mm %1616, %138 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1618 = torch.aten.add.Tensor %1617, %139, %int1_953 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1619 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_954 = torch.constant.int 1
    %int7_955 = torch.constant.int 7
    %int1600_956 = torch.constant.int 1600
    %1620 = torch.prim.ListConstruct %int1_954, %int7_955, %int1600_956 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1621 = torch.aten.reshape %1618, %1620 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_957 = torch.constant.int 1
    %1622 = torch.aten.add.Tensor %1621, %1566, %int1_957 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_958 = torch.constant.float 9.9999997473787516E-6
    %int1600_959 = torch.constant.int 1600
    %1623 = torch.prim.ListConstruct %int1600_959 : (!torch.int) -> !torch.list<int>
    %result0_960, %result1_961, %result2_962 = torch.aten.native_layer_norm %1622, %1623, %140, %141, %float9.999990e-06_958 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1624 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_963 = torch.constant.int 7
    %int1600_964 = torch.constant.int 1600
    %1625 = torch.prim.ListConstruct %int7_963, %int1600_964 : (!torch.int, !torch.int) -> !torch.list<int>
    %1626 = torch.aten.reshape %result0_960, %1625 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_965 = torch.constant.int 0
    %int1_966 = torch.constant.int 1
    %1627 = torch.aten.mm %1626, %142 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %1628 = torch.aten.add.Tensor %1627, %143, %int1_966 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %1629 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_967 = torch.constant.int 1
    %int7_968 = torch.constant.int 7
    %int6400_969 = torch.constant.int 6400
    %1630 = torch.prim.ListConstruct %int1_967, %int7_968, %int6400_969 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1631 = torch.aten.reshape %1628, %1630 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %1632 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1633 = torch.aten.mul.Tensor %1631, %1632 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1634 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %1635 = torch.aten.pow.Tensor_Tensor %1631, %1634 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1636 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1637 = torch.aten.mul.Tensor %1635, %1636 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_970 = torch.constant.int 1
    %1638 = torch.aten.add.Tensor %1631, %1637, %int1_970 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1639 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %1640 = torch.aten.mul.Tensor %1638, %1639 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1641 = torch.aten.tanh %1640 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1642 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_971 = torch.constant.int 1
    %1643 = torch.aten.add.Tensor %1641, %1642, %int1_971 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1644 = torch.aten.mul.Tensor %1633, %1643 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1645 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_972 = torch.constant.int 7
    %int6400_973 = torch.constant.int 6400
    %1646 = torch.prim.ListConstruct %int7_972, %int6400_973 : (!torch.int, !torch.int) -> !torch.list<int>
    %1647 = torch.aten.reshape %1644, %1646 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_974 = torch.constant.int 0
    %int1_975 = torch.constant.int 1
    %1648 = torch.aten.mm %1647, %144 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1649 = torch.aten.add.Tensor %1648, %145, %int1_975 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1650 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_976 = torch.constant.int 1
    %int7_977 = torch.constant.int 7
    %int1600_978 = torch.constant.int 1600
    %1651 = torch.prim.ListConstruct %int1_976, %int7_977, %int1600_978 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1652 = torch.aten.reshape %1649, %1651 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_979 = torch.constant.int 1
    %1653 = torch.aten.add.Tensor %1622, %1652, %int1_979 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_980 = torch.constant.float 9.9999997473787516E-6
    %int1600_981 = torch.constant.int 1600
    %1654 = torch.prim.ListConstruct %int1600_981 : (!torch.int) -> !torch.list<int>
    %result0_982, %result1_983, %result2_984 = torch.aten.native_layer_norm %1653, %1654, %146, %147, %float9.999990e-06_980 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1655 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_985 = torch.constant.int 7
    %int1600_986 = torch.constant.int 1600
    %1656 = torch.prim.ListConstruct %int7_985, %int1600_986 : (!torch.int, !torch.int) -> !torch.list<int>
    %1657 = torch.aten.reshape %result0_982, %1656 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_987 = torch.constant.int 0
    %int1_988 = torch.constant.int 1
    %1658 = torch.aten.mm %1657, %148 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %1659 = torch.aten.add.Tensor %1658, %149, %int1_988 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %1660 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_989 = torch.constant.int 1
    %int7_990 = torch.constant.int 7
    %int4800_991 = torch.constant.int 4800
    %1661 = torch.prim.ListConstruct %int1_989, %int7_990, %int4800_991 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1662 = torch.aten.reshape %1659, %1661 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %1663 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %1664 = torch.prim.tolist(%1663) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_992 = torch.constant.int 2
    %1665 = torch.aten.split_with_sizes %1662, %1664, %int2_992 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %1666:3 = torch.prim.ListUnpack %1665 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %1667 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_993 = torch.constant.int 1
    %int7_994 = torch.constant.int 7
    %int25_995 = torch.constant.int 25
    %int64_996 = torch.constant.int 64
    %1668 = torch.prim.ListConstruct %int1_993, %int7_994, %int25_995, %int64_996 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1669 = torch.aten.reshape %1666#0, %1668 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_997 = torch.constant.int 1
    %int2_998 = torch.constant.int 2
    %1670 = torch.aten.transpose.int %1669, %int1_997, %int2_998 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1671 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_999 = torch.constant.int 1
    %int7_1000 = torch.constant.int 7
    %int25_1001 = torch.constant.int 25
    %int64_1002 = torch.constant.int 64
    %1672 = torch.prim.ListConstruct %int1_999, %int7_1000, %int25_1001, %int64_1002 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1673 = torch.aten.reshape %1666#1, %1672 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1003 = torch.constant.int 1
    %int2_1004 = torch.constant.int 2
    %1674 = torch.aten.transpose.int %1673, %int1_1003, %int2_1004 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1675 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1005 = torch.constant.int 1
    %int7_1006 = torch.constant.int 7
    %int25_1007 = torch.constant.int 25
    %int64_1008 = torch.constant.int 64
    %1676 = torch.prim.ListConstruct %int1_1005, %int7_1006, %int25_1007, %int64_1008 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1677 = torch.aten.reshape %1666#2, %1676 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1009 = torch.constant.int 1
    %int2_1010 = torch.constant.int 2
    %1678 = torch.aten.transpose.int %1677, %int1_1009, %int2_1010 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1011 = torch.constant.int 1
    %int2_1012 = torch.constant.int 2
    %1679 = torch.aten.transpose.int %1673, %int1_1011, %int2_1012 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_1013 = torch.constant.int 2
    %int3_1014 = torch.constant.int 3
    %1680 = torch.aten.transpose.int %1679, %int2_1013, %int3_1014 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %1681 = torch.aten.matmul %1670, %1680 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1682 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1683 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1684 = torch.aten.pow.Tensor_Tensor %1682, %1683 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %1685 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_1015 = torch.constant.int 0
    %1686 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_1016 = torch.constant.none
    %float0.000000e00_1017 = torch.constant.float 0.000000e+00
    %1687 = torch.aten.full %1686, %float0.000000e00_1017, %none_1016, %none_1016, %none_1016, %none_1016 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_1018 = torch.constant.int 1
    %1688 = torch.aten.add.Tensor %1687, %1684, %int1_1018 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %1689 = torch.aten.div.Tensor %1681, %1688 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1690 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_1019 = torch.constant.int 6
    %none_1020 = torch.constant.none
    %false_1021 = torch.constant.bool false
    %1691 = torch.aten.to.dtype %1689, %int6_1019, %false_1021, %false_1021, %none_1020 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1692 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %1693 = torch.aten.where.self %1690, %1691, %1692 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_1022 = torch.constant.int 3
    %none_1023 = torch.constant.none
    %1694 = torch.aten.softmax.int %1693, %int3_1022, %none_1023 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_1024 = torch.constant.int 6
    %none_1025 = torch.constant.none
    %false_1026 = torch.constant.bool false
    %1695 = torch.aten.to.dtype %1694, %int6_1024, %false_1026, %false_1026, %none_1025 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1696 = torch.aten.matmul %1695, %1678 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1027 = torch.constant.int 1
    %int2_1028 = torch.constant.int 2
    %1697 = torch.aten.transpose.int %1696, %int1_1027, %int2_1028 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %1698 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1029 = torch.constant.int 1
    %int7_1030 = torch.constant.int 7
    %int1600_1031 = torch.constant.int 1600
    %1699 = torch.prim.ListConstruct %int1_1029, %int7_1030, %int1600_1031 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1700 = torch.aten.reshape %1697, %1699 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %1701 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1032 = torch.constant.int 7
    %int1600_1033 = torch.constant.int 1600
    %1702 = torch.prim.ListConstruct %int7_1032, %int1600_1033 : (!torch.int, !torch.int) -> !torch.list<int>
    %1703 = torch.aten.reshape %1700, %1702 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1034 = torch.constant.int 0
    %int1_1035 = torch.constant.int 1
    %1704 = torch.aten.mm %1703, %150 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1705 = torch.aten.add.Tensor %1704, %151, %int1_1035 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1706 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1036 = torch.constant.int 1
    %int7_1037 = torch.constant.int 7
    %int1600_1038 = torch.constant.int 1600
    %1707 = torch.prim.ListConstruct %int1_1036, %int7_1037, %int1600_1038 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1708 = torch.aten.reshape %1705, %1707 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1039 = torch.constant.int 1
    %1709 = torch.aten.add.Tensor %1708, %1653, %int1_1039 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1040 = torch.constant.float 9.9999997473787516E-6
    %int1600_1041 = torch.constant.int 1600
    %1710 = torch.prim.ListConstruct %int1600_1041 : (!torch.int) -> !torch.list<int>
    %result0_1042, %result1_1043, %result2_1044 = torch.aten.native_layer_norm %1709, %1710, %152, %153, %float9.999990e-06_1040 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1711 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1045 = torch.constant.int 7
    %int1600_1046 = torch.constant.int 1600
    %1712 = torch.prim.ListConstruct %int7_1045, %int1600_1046 : (!torch.int, !torch.int) -> !torch.list<int>
    %1713 = torch.aten.reshape %result0_1042, %1712 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1047 = torch.constant.int 0
    %int1_1048 = torch.constant.int 1
    %1714 = torch.aten.mm %1713, %154 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %1715 = torch.aten.add.Tensor %1714, %155, %int1_1048 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %1716 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1049 = torch.constant.int 1
    %int7_1050 = torch.constant.int 7
    %int6400_1051 = torch.constant.int 6400
    %1717 = torch.prim.ListConstruct %int1_1049, %int7_1050, %int6400_1051 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1718 = torch.aten.reshape %1715, %1717 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %1719 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1720 = torch.aten.mul.Tensor %1718, %1719 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1721 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %1722 = torch.aten.pow.Tensor_Tensor %1718, %1721 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1723 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1724 = torch.aten.mul.Tensor %1722, %1723 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_1052 = torch.constant.int 1
    %1725 = torch.aten.add.Tensor %1718, %1724, %int1_1052 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1726 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %1727 = torch.aten.mul.Tensor %1725, %1726 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1728 = torch.aten.tanh %1727 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1729 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_1053 = torch.constant.int 1
    %1730 = torch.aten.add.Tensor %1728, %1729, %int1_1053 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1731 = torch.aten.mul.Tensor %1720, %1730 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1732 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1054 = torch.constant.int 7
    %int6400_1055 = torch.constant.int 6400
    %1733 = torch.prim.ListConstruct %int7_1054, %int6400_1055 : (!torch.int, !torch.int) -> !torch.list<int>
    %1734 = torch.aten.reshape %1731, %1733 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_1056 = torch.constant.int 0
    %int1_1057 = torch.constant.int 1
    %1735 = torch.aten.mm %1734, %156 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1736 = torch.aten.add.Tensor %1735, %157, %int1_1057 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1737 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1058 = torch.constant.int 1
    %int7_1059 = torch.constant.int 7
    %int1600_1060 = torch.constant.int 1600
    %1738 = torch.prim.ListConstruct %int1_1058, %int7_1059, %int1600_1060 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1739 = torch.aten.reshape %1736, %1738 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1061 = torch.constant.int 1
    %1740 = torch.aten.add.Tensor %1709, %1739, %int1_1061 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1062 = torch.constant.float 9.9999997473787516E-6
    %int1600_1063 = torch.constant.int 1600
    %1741 = torch.prim.ListConstruct %int1600_1063 : (!torch.int) -> !torch.list<int>
    %result0_1064, %result1_1065, %result2_1066 = torch.aten.native_layer_norm %1740, %1741, %158, %159, %float9.999990e-06_1062 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1742 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1067 = torch.constant.int 7
    %int1600_1068 = torch.constant.int 1600
    %1743 = torch.prim.ListConstruct %int7_1067, %int1600_1068 : (!torch.int, !torch.int) -> !torch.list<int>
    %1744 = torch.aten.reshape %result0_1064, %1743 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1069 = torch.constant.int 0
    %int1_1070 = torch.constant.int 1
    %1745 = torch.aten.mm %1744, %160 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %1746 = torch.aten.add.Tensor %1745, %161, %int1_1070 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %1747 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1071 = torch.constant.int 1
    %int7_1072 = torch.constant.int 7
    %int4800_1073 = torch.constant.int 4800
    %1748 = torch.prim.ListConstruct %int1_1071, %int7_1072, %int4800_1073 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1749 = torch.aten.reshape %1746, %1748 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %1750 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %1751 = torch.prim.tolist(%1750) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_1074 = torch.constant.int 2
    %1752 = torch.aten.split_with_sizes %1749, %1751, %int2_1074 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %1753:3 = torch.prim.ListUnpack %1752 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %1754 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1075 = torch.constant.int 1
    %int7_1076 = torch.constant.int 7
    %int25_1077 = torch.constant.int 25
    %int64_1078 = torch.constant.int 64
    %1755 = torch.prim.ListConstruct %int1_1075, %int7_1076, %int25_1077, %int64_1078 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1756 = torch.aten.reshape %1753#0, %1755 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1079 = torch.constant.int 1
    %int2_1080 = torch.constant.int 2
    %1757 = torch.aten.transpose.int %1756, %int1_1079, %int2_1080 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1758 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1081 = torch.constant.int 1
    %int7_1082 = torch.constant.int 7
    %int25_1083 = torch.constant.int 25
    %int64_1084 = torch.constant.int 64
    %1759 = torch.prim.ListConstruct %int1_1081, %int7_1082, %int25_1083, %int64_1084 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1760 = torch.aten.reshape %1753#1, %1759 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1085 = torch.constant.int 1
    %int2_1086 = torch.constant.int 2
    %1761 = torch.aten.transpose.int %1760, %int1_1085, %int2_1086 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1762 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1087 = torch.constant.int 1
    %int7_1088 = torch.constant.int 7
    %int25_1089 = torch.constant.int 25
    %int64_1090 = torch.constant.int 64
    %1763 = torch.prim.ListConstruct %int1_1087, %int7_1088, %int25_1089, %int64_1090 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1764 = torch.aten.reshape %1753#2, %1763 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1091 = torch.constant.int 1
    %int2_1092 = torch.constant.int 2
    %1765 = torch.aten.transpose.int %1764, %int1_1091, %int2_1092 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1093 = torch.constant.int 1
    %int2_1094 = torch.constant.int 2
    %1766 = torch.aten.transpose.int %1760, %int1_1093, %int2_1094 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_1095 = torch.constant.int 2
    %int3_1096 = torch.constant.int 3
    %1767 = torch.aten.transpose.int %1766, %int2_1095, %int3_1096 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %1768 = torch.aten.matmul %1757, %1767 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1769 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1770 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1771 = torch.aten.pow.Tensor_Tensor %1769, %1770 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %1772 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_1097 = torch.constant.int 0
    %1773 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_1098 = torch.constant.none
    %float0.000000e00_1099 = torch.constant.float 0.000000e+00
    %1774 = torch.aten.full %1773, %float0.000000e00_1099, %none_1098, %none_1098, %none_1098, %none_1098 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_1100 = torch.constant.int 1
    %1775 = torch.aten.add.Tensor %1774, %1771, %int1_1100 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %1776 = torch.aten.div.Tensor %1768, %1775 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1777 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_1101 = torch.constant.int 6
    %none_1102 = torch.constant.none
    %false_1103 = torch.constant.bool false
    %1778 = torch.aten.to.dtype %1776, %int6_1101, %false_1103, %false_1103, %none_1102 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1779 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %1780 = torch.aten.where.self %1777, %1778, %1779 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_1104 = torch.constant.int 3
    %none_1105 = torch.constant.none
    %1781 = torch.aten.softmax.int %1780, %int3_1104, %none_1105 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_1106 = torch.constant.int 6
    %none_1107 = torch.constant.none
    %false_1108 = torch.constant.bool false
    %1782 = torch.aten.to.dtype %1781, %int6_1106, %false_1108, %false_1108, %none_1107 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1783 = torch.aten.matmul %1782, %1765 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1109 = torch.constant.int 1
    %int2_1110 = torch.constant.int 2
    %1784 = torch.aten.transpose.int %1783, %int1_1109, %int2_1110 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %1785 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1111 = torch.constant.int 1
    %int7_1112 = torch.constant.int 7
    %int1600_1113 = torch.constant.int 1600
    %1786 = torch.prim.ListConstruct %int1_1111, %int7_1112, %int1600_1113 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1787 = torch.aten.reshape %1784, %1786 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %1788 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1114 = torch.constant.int 7
    %int1600_1115 = torch.constant.int 1600
    %1789 = torch.prim.ListConstruct %int7_1114, %int1600_1115 : (!torch.int, !torch.int) -> !torch.list<int>
    %1790 = torch.aten.reshape %1787, %1789 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1116 = torch.constant.int 0
    %int1_1117 = torch.constant.int 1
    %1791 = torch.aten.mm %1790, %162 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1792 = torch.aten.add.Tensor %1791, %163, %int1_1117 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1793 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1118 = torch.constant.int 1
    %int7_1119 = torch.constant.int 7
    %int1600_1120 = torch.constant.int 1600
    %1794 = torch.prim.ListConstruct %int1_1118, %int7_1119, %int1600_1120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1795 = torch.aten.reshape %1792, %1794 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1121 = torch.constant.int 1
    %1796 = torch.aten.add.Tensor %1795, %1740, %int1_1121 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1122 = torch.constant.float 9.9999997473787516E-6
    %int1600_1123 = torch.constant.int 1600
    %1797 = torch.prim.ListConstruct %int1600_1123 : (!torch.int) -> !torch.list<int>
    %result0_1124, %result1_1125, %result2_1126 = torch.aten.native_layer_norm %1796, %1797, %164, %165, %float9.999990e-06_1122 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1798 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1127 = torch.constant.int 7
    %int1600_1128 = torch.constant.int 1600
    %1799 = torch.prim.ListConstruct %int7_1127, %int1600_1128 : (!torch.int, !torch.int) -> !torch.list<int>
    %1800 = torch.aten.reshape %result0_1124, %1799 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1129 = torch.constant.int 0
    %int1_1130 = torch.constant.int 1
    %1801 = torch.aten.mm %1800, %166 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %1802 = torch.aten.add.Tensor %1801, %167, %int1_1130 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %1803 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1131 = torch.constant.int 1
    %int7_1132 = torch.constant.int 7
    %int6400_1133 = torch.constant.int 6400
    %1804 = torch.prim.ListConstruct %int1_1131, %int7_1132, %int6400_1133 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1805 = torch.aten.reshape %1802, %1804 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %1806 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1807 = torch.aten.mul.Tensor %1805, %1806 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1808 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %1809 = torch.aten.pow.Tensor_Tensor %1805, %1808 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1810 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1811 = torch.aten.mul.Tensor %1809, %1810 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_1134 = torch.constant.int 1
    %1812 = torch.aten.add.Tensor %1805, %1811, %int1_1134 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1813 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %1814 = torch.aten.mul.Tensor %1812, %1813 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1815 = torch.aten.tanh %1814 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1816 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_1135 = torch.constant.int 1
    %1817 = torch.aten.add.Tensor %1815, %1816, %int1_1135 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1818 = torch.aten.mul.Tensor %1807, %1817 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1819 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1136 = torch.constant.int 7
    %int6400_1137 = torch.constant.int 6400
    %1820 = torch.prim.ListConstruct %int7_1136, %int6400_1137 : (!torch.int, !torch.int) -> !torch.list<int>
    %1821 = torch.aten.reshape %1818, %1820 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_1138 = torch.constant.int 0
    %int1_1139 = torch.constant.int 1
    %1822 = torch.aten.mm %1821, %168 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1823 = torch.aten.add.Tensor %1822, %169, %int1_1139 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1824 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1140 = torch.constant.int 1
    %int7_1141 = torch.constant.int 7
    %int1600_1142 = torch.constant.int 1600
    %1825 = torch.prim.ListConstruct %int1_1140, %int7_1141, %int1600_1142 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1826 = torch.aten.reshape %1823, %1825 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1143 = torch.constant.int 1
    %1827 = torch.aten.add.Tensor %1796, %1826, %int1_1143 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1144 = torch.constant.float 9.9999997473787516E-6
    %int1600_1145 = torch.constant.int 1600
    %1828 = torch.prim.ListConstruct %int1600_1145 : (!torch.int) -> !torch.list<int>
    %result0_1146, %result1_1147, %result2_1148 = torch.aten.native_layer_norm %1827, %1828, %170, %171, %float9.999990e-06_1144 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1829 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1149 = torch.constant.int 7
    %int1600_1150 = torch.constant.int 1600
    %1830 = torch.prim.ListConstruct %int7_1149, %int1600_1150 : (!torch.int, !torch.int) -> !torch.list<int>
    %1831 = torch.aten.reshape %result0_1146, %1830 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1151 = torch.constant.int 0
    %int1_1152 = torch.constant.int 1
    %1832 = torch.aten.mm %1831, %172 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %1833 = torch.aten.add.Tensor %1832, %173, %int1_1152 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %1834 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1153 = torch.constant.int 1
    %int7_1154 = torch.constant.int 7
    %int4800_1155 = torch.constant.int 4800
    %1835 = torch.prim.ListConstruct %int1_1153, %int7_1154, %int4800_1155 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1836 = torch.aten.reshape %1833, %1835 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %1837 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %1838 = torch.prim.tolist(%1837) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_1156 = torch.constant.int 2
    %1839 = torch.aten.split_with_sizes %1836, %1838, %int2_1156 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %1840:3 = torch.prim.ListUnpack %1839 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %1841 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1157 = torch.constant.int 1
    %int7_1158 = torch.constant.int 7
    %int25_1159 = torch.constant.int 25
    %int64_1160 = torch.constant.int 64
    %1842 = torch.prim.ListConstruct %int1_1157, %int7_1158, %int25_1159, %int64_1160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1843 = torch.aten.reshape %1840#0, %1842 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1161 = torch.constant.int 1
    %int2_1162 = torch.constant.int 2
    %1844 = torch.aten.transpose.int %1843, %int1_1161, %int2_1162 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1845 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1163 = torch.constant.int 1
    %int7_1164 = torch.constant.int 7
    %int25_1165 = torch.constant.int 25
    %int64_1166 = torch.constant.int 64
    %1846 = torch.prim.ListConstruct %int1_1163, %int7_1164, %int25_1165, %int64_1166 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1847 = torch.aten.reshape %1840#1, %1846 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1167 = torch.constant.int 1
    %int2_1168 = torch.constant.int 2
    %1848 = torch.aten.transpose.int %1847, %int1_1167, %int2_1168 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1849 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1169 = torch.constant.int 1
    %int7_1170 = torch.constant.int 7
    %int25_1171 = torch.constant.int 25
    %int64_1172 = torch.constant.int 64
    %1850 = torch.prim.ListConstruct %int1_1169, %int7_1170, %int25_1171, %int64_1172 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1851 = torch.aten.reshape %1840#2, %1850 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1173 = torch.constant.int 1
    %int2_1174 = torch.constant.int 2
    %1852 = torch.aten.transpose.int %1851, %int1_1173, %int2_1174 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1175 = torch.constant.int 1
    %int2_1176 = torch.constant.int 2
    %1853 = torch.aten.transpose.int %1847, %int1_1175, %int2_1176 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_1177 = torch.constant.int 2
    %int3_1178 = torch.constant.int 3
    %1854 = torch.aten.transpose.int %1853, %int2_1177, %int3_1178 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %1855 = torch.aten.matmul %1844, %1854 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1856 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1857 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1858 = torch.aten.pow.Tensor_Tensor %1856, %1857 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %1859 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_1179 = torch.constant.int 0
    %1860 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_1180 = torch.constant.none
    %float0.000000e00_1181 = torch.constant.float 0.000000e+00
    %1861 = torch.aten.full %1860, %float0.000000e00_1181, %none_1180, %none_1180, %none_1180, %none_1180 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_1182 = torch.constant.int 1
    %1862 = torch.aten.add.Tensor %1861, %1858, %int1_1182 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %1863 = torch.aten.div.Tensor %1855, %1862 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1864 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_1183 = torch.constant.int 6
    %none_1184 = torch.constant.none
    %false_1185 = torch.constant.bool false
    %1865 = torch.aten.to.dtype %1863, %int6_1183, %false_1185, %false_1185, %none_1184 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1866 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %1867 = torch.aten.where.self %1864, %1865, %1866 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_1186 = torch.constant.int 3
    %none_1187 = torch.constant.none
    %1868 = torch.aten.softmax.int %1867, %int3_1186, %none_1187 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_1188 = torch.constant.int 6
    %none_1189 = torch.constant.none
    %false_1190 = torch.constant.bool false
    %1869 = torch.aten.to.dtype %1868, %int6_1188, %false_1190, %false_1190, %none_1189 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1870 = torch.aten.matmul %1869, %1852 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1191 = torch.constant.int 1
    %int2_1192 = torch.constant.int 2
    %1871 = torch.aten.transpose.int %1870, %int1_1191, %int2_1192 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %1872 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1193 = torch.constant.int 1
    %int7_1194 = torch.constant.int 7
    %int1600_1195 = torch.constant.int 1600
    %1873 = torch.prim.ListConstruct %int1_1193, %int7_1194, %int1600_1195 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1874 = torch.aten.reshape %1871, %1873 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %1875 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1196 = torch.constant.int 7
    %int1600_1197 = torch.constant.int 1600
    %1876 = torch.prim.ListConstruct %int7_1196, %int1600_1197 : (!torch.int, !torch.int) -> !torch.list<int>
    %1877 = torch.aten.reshape %1874, %1876 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1198 = torch.constant.int 0
    %int1_1199 = torch.constant.int 1
    %1878 = torch.aten.mm %1877, %174 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1879 = torch.aten.add.Tensor %1878, %175, %int1_1199 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1880 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1200 = torch.constant.int 1
    %int7_1201 = torch.constant.int 7
    %int1600_1202 = torch.constant.int 1600
    %1881 = torch.prim.ListConstruct %int1_1200, %int7_1201, %int1600_1202 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1882 = torch.aten.reshape %1879, %1881 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1203 = torch.constant.int 1
    %1883 = torch.aten.add.Tensor %1882, %1827, %int1_1203 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1204 = torch.constant.float 9.9999997473787516E-6
    %int1600_1205 = torch.constant.int 1600
    %1884 = torch.prim.ListConstruct %int1600_1205 : (!torch.int) -> !torch.list<int>
    %result0_1206, %result1_1207, %result2_1208 = torch.aten.native_layer_norm %1883, %1884, %176, %177, %float9.999990e-06_1204 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1885 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1209 = torch.constant.int 7
    %int1600_1210 = torch.constant.int 1600
    %1886 = torch.prim.ListConstruct %int7_1209, %int1600_1210 : (!torch.int, !torch.int) -> !torch.list<int>
    %1887 = torch.aten.reshape %result0_1206, %1886 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1211 = torch.constant.int 0
    %int1_1212 = torch.constant.int 1
    %1888 = torch.aten.mm %1887, %178 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %1889 = torch.aten.add.Tensor %1888, %179, %int1_1212 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %1890 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1213 = torch.constant.int 1
    %int7_1214 = torch.constant.int 7
    %int6400_1215 = torch.constant.int 6400
    %1891 = torch.prim.ListConstruct %int1_1213, %int7_1214, %int6400_1215 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1892 = torch.aten.reshape %1889, %1891 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %1893 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1894 = torch.aten.mul.Tensor %1892, %1893 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1895 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %1896 = torch.aten.pow.Tensor_Tensor %1892, %1895 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1897 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1898 = torch.aten.mul.Tensor %1896, %1897 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_1216 = torch.constant.int 1
    %1899 = torch.aten.add.Tensor %1892, %1898, %int1_1216 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1900 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %1901 = torch.aten.mul.Tensor %1899, %1900 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1902 = torch.aten.tanh %1901 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1903 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_1217 = torch.constant.int 1
    %1904 = torch.aten.add.Tensor %1902, %1903, %int1_1217 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1905 = torch.aten.mul.Tensor %1894, %1904 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1906 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1218 = torch.constant.int 7
    %int6400_1219 = torch.constant.int 6400
    %1907 = torch.prim.ListConstruct %int7_1218, %int6400_1219 : (!torch.int, !torch.int) -> !torch.list<int>
    %1908 = torch.aten.reshape %1905, %1907 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_1220 = torch.constant.int 0
    %int1_1221 = torch.constant.int 1
    %1909 = torch.aten.mm %1908, %180 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1910 = torch.aten.add.Tensor %1909, %181, %int1_1221 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1911 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1222 = torch.constant.int 1
    %int7_1223 = torch.constant.int 7
    %int1600_1224 = torch.constant.int 1600
    %1912 = torch.prim.ListConstruct %int1_1222, %int7_1223, %int1600_1224 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1913 = torch.aten.reshape %1910, %1912 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1225 = torch.constant.int 1
    %1914 = torch.aten.add.Tensor %1883, %1913, %int1_1225 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1226 = torch.constant.float 9.9999997473787516E-6
    %int1600_1227 = torch.constant.int 1600
    %1915 = torch.prim.ListConstruct %int1600_1227 : (!torch.int) -> !torch.list<int>
    %result0_1228, %result1_1229, %result2_1230 = torch.aten.native_layer_norm %1914, %1915, %182, %183, %float9.999990e-06_1226 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1916 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1231 = torch.constant.int 7
    %int1600_1232 = torch.constant.int 1600
    %1917 = torch.prim.ListConstruct %int7_1231, %int1600_1232 : (!torch.int, !torch.int) -> !torch.list<int>
    %1918 = torch.aten.reshape %result0_1228, %1917 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1233 = torch.constant.int 0
    %int1_1234 = torch.constant.int 1
    %1919 = torch.aten.mm %1918, %184 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %1920 = torch.aten.add.Tensor %1919, %185, %int1_1234 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %1921 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1235 = torch.constant.int 1
    %int7_1236 = torch.constant.int 7
    %int4800_1237 = torch.constant.int 4800
    %1922 = torch.prim.ListConstruct %int1_1235, %int7_1236, %int4800_1237 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1923 = torch.aten.reshape %1920, %1922 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %1924 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %1925 = torch.prim.tolist(%1924) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_1238 = torch.constant.int 2
    %1926 = torch.aten.split_with_sizes %1923, %1925, %int2_1238 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %1927:3 = torch.prim.ListUnpack %1926 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %1928 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1239 = torch.constant.int 1
    %int7_1240 = torch.constant.int 7
    %int25_1241 = torch.constant.int 25
    %int64_1242 = torch.constant.int 64
    %1929 = torch.prim.ListConstruct %int1_1239, %int7_1240, %int25_1241, %int64_1242 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1930 = torch.aten.reshape %1927#0, %1929 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1243 = torch.constant.int 1
    %int2_1244 = torch.constant.int 2
    %1931 = torch.aten.transpose.int %1930, %int1_1243, %int2_1244 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1932 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1245 = torch.constant.int 1
    %int7_1246 = torch.constant.int 7
    %int25_1247 = torch.constant.int 25
    %int64_1248 = torch.constant.int 64
    %1933 = torch.prim.ListConstruct %int1_1245, %int7_1246, %int25_1247, %int64_1248 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1934 = torch.aten.reshape %1927#1, %1933 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1249 = torch.constant.int 1
    %int2_1250 = torch.constant.int 2
    %1935 = torch.aten.transpose.int %1934, %int1_1249, %int2_1250 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %1936 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1251 = torch.constant.int 1
    %int7_1252 = torch.constant.int 7
    %int25_1253 = torch.constant.int 25
    %int64_1254 = torch.constant.int 64
    %1937 = torch.prim.ListConstruct %int1_1251, %int7_1252, %int25_1253, %int64_1254 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1938 = torch.aten.reshape %1927#2, %1937 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1255 = torch.constant.int 1
    %int2_1256 = torch.constant.int 2
    %1939 = torch.aten.transpose.int %1938, %int1_1255, %int2_1256 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1257 = torch.constant.int 1
    %int2_1258 = torch.constant.int 2
    %1940 = torch.aten.transpose.int %1934, %int1_1257, %int2_1258 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_1259 = torch.constant.int 2
    %int3_1260 = torch.constant.int 3
    %1941 = torch.aten.transpose.int %1940, %int2_1259, %int3_1260 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %1942 = torch.aten.matmul %1931, %1941 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1943 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1944 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1945 = torch.aten.pow.Tensor_Tensor %1943, %1944 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %1946 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_1261 = torch.constant.int 0
    %1947 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_1262 = torch.constant.none
    %float0.000000e00_1263 = torch.constant.float 0.000000e+00
    %1948 = torch.aten.full %1947, %float0.000000e00_1263, %none_1262, %none_1262, %none_1262, %none_1262 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_1264 = torch.constant.int 1
    %1949 = torch.aten.add.Tensor %1948, %1945, %int1_1264 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %1950 = torch.aten.div.Tensor %1942, %1949 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %1951 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_1265 = torch.constant.int 6
    %none_1266 = torch.constant.none
    %false_1267 = torch.constant.bool false
    %1952 = torch.aten.to.dtype %1950, %int6_1265, %false_1267, %false_1267, %none_1266 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1953 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %1954 = torch.aten.where.self %1951, %1952, %1953 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_1268 = torch.constant.int 3
    %none_1269 = torch.constant.none
    %1955 = torch.aten.softmax.int %1954, %int3_1268, %none_1269 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_1270 = torch.constant.int 6
    %none_1271 = torch.constant.none
    %false_1272 = torch.constant.bool false
    %1956 = torch.aten.to.dtype %1955, %int6_1270, %false_1272, %false_1272, %none_1271 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %1957 = torch.aten.matmul %1956, %1939 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1273 = torch.constant.int 1
    %int2_1274 = torch.constant.int 2
    %1958 = torch.aten.transpose.int %1957, %int1_1273, %int2_1274 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %1959 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1275 = torch.constant.int 1
    %int7_1276 = torch.constant.int 7
    %int1600_1277 = torch.constant.int 1600
    %1960 = torch.prim.ListConstruct %int1_1275, %int7_1276, %int1600_1277 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1961 = torch.aten.reshape %1958, %1960 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %1962 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1278 = torch.constant.int 7
    %int1600_1279 = torch.constant.int 1600
    %1963 = torch.prim.ListConstruct %int7_1278, %int1600_1279 : (!torch.int, !torch.int) -> !torch.list<int>
    %1964 = torch.aten.reshape %1961, %1963 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1280 = torch.constant.int 0
    %int1_1281 = torch.constant.int 1
    %1965 = torch.aten.mm %1964, %186 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1966 = torch.aten.add.Tensor %1965, %187, %int1_1281 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1967 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1282 = torch.constant.int 1
    %int7_1283 = torch.constant.int 7
    %int1600_1284 = torch.constant.int 1600
    %1968 = torch.prim.ListConstruct %int1_1282, %int7_1283, %int1600_1284 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1969 = torch.aten.reshape %1966, %1968 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1285 = torch.constant.int 1
    %1970 = torch.aten.add.Tensor %1969, %1914, %int1_1285 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1286 = torch.constant.float 9.9999997473787516E-6
    %int1600_1287 = torch.constant.int 1600
    %1971 = torch.prim.ListConstruct %int1600_1287 : (!torch.int) -> !torch.list<int>
    %result0_1288, %result1_1289, %result2_1290 = torch.aten.native_layer_norm %1970, %1971, %188, %189, %float9.999990e-06_1286 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %1972 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1291 = torch.constant.int 7
    %int1600_1292 = torch.constant.int 1600
    %1973 = torch.prim.ListConstruct %int7_1291, %int1600_1292 : (!torch.int, !torch.int) -> !torch.list<int>
    %1974 = torch.aten.reshape %result0_1288, %1973 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1293 = torch.constant.int 0
    %int1_1294 = torch.constant.int 1
    %1975 = torch.aten.mm %1974, %190 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %1976 = torch.aten.add.Tensor %1975, %191, %int1_1294 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %1977 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1295 = torch.constant.int 1
    %int7_1296 = torch.constant.int 7
    %int6400_1297 = torch.constant.int 6400
    %1978 = torch.prim.ListConstruct %int1_1295, %int7_1296, %int6400_1297 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1979 = torch.aten.reshape %1976, %1978 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %1980 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %1981 = torch.aten.mul.Tensor %1979, %1980 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1982 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %1983 = torch.aten.pow.Tensor_Tensor %1979, %1982 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1984 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %1985 = torch.aten.mul.Tensor %1983, %1984 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_1298 = torch.constant.int 1
    %1986 = torch.aten.add.Tensor %1979, %1985, %int1_1298 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1987 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %1988 = torch.aten.mul.Tensor %1986, %1987 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1989 = torch.aten.tanh %1988 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1990 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_1299 = torch.constant.int 1
    %1991 = torch.aten.add.Tensor %1989, %1990, %int1_1299 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %1992 = torch.aten.mul.Tensor %1981, %1991 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %1993 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1300 = torch.constant.int 7
    %int6400_1301 = torch.constant.int 6400
    %1994 = torch.prim.ListConstruct %int7_1300, %int6400_1301 : (!torch.int, !torch.int) -> !torch.list<int>
    %1995 = torch.aten.reshape %1992, %1994 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_1302 = torch.constant.int 0
    %int1_1303 = torch.constant.int 1
    %1996 = torch.aten.mm %1995, %192 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %1997 = torch.aten.add.Tensor %1996, %193, %int1_1303 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %1998 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1304 = torch.constant.int 1
    %int7_1305 = torch.constant.int 7
    %int1600_1306 = torch.constant.int 1600
    %1999 = torch.prim.ListConstruct %int1_1304, %int7_1305, %int1600_1306 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2000 = torch.aten.reshape %1997, %1999 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1307 = torch.constant.int 1
    %2001 = torch.aten.add.Tensor %1970, %2000, %int1_1307 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1308 = torch.constant.float 9.9999997473787516E-6
    %int1600_1309 = torch.constant.int 1600
    %2002 = torch.prim.ListConstruct %int1600_1309 : (!torch.int) -> !torch.list<int>
    %result0_1310, %result1_1311, %result2_1312 = torch.aten.native_layer_norm %2001, %2002, %194, %195, %float9.999990e-06_1308 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2003 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1313 = torch.constant.int 7
    %int1600_1314 = torch.constant.int 1600
    %2004 = torch.prim.ListConstruct %int7_1313, %int1600_1314 : (!torch.int, !torch.int) -> !torch.list<int>
    %2005 = torch.aten.reshape %result0_1310, %2004 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1315 = torch.constant.int 0
    %int1_1316 = torch.constant.int 1
    %2006 = torch.aten.mm %2005, %196 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %2007 = torch.aten.add.Tensor %2006, %197, %int1_1316 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %2008 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1317 = torch.constant.int 1
    %int7_1318 = torch.constant.int 7
    %int4800_1319 = torch.constant.int 4800
    %2009 = torch.prim.ListConstruct %int1_1317, %int7_1318, %int4800_1319 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2010 = torch.aten.reshape %2007, %2009 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %2011 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %2012 = torch.prim.tolist(%2011) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_1320 = torch.constant.int 2
    %2013 = torch.aten.split_with_sizes %2010, %2012, %int2_1320 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %2014:3 = torch.prim.ListUnpack %2013 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %2015 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1321 = torch.constant.int 1
    %int7_1322 = torch.constant.int 7
    %int25_1323 = torch.constant.int 25
    %int64_1324 = torch.constant.int 64
    %2016 = torch.prim.ListConstruct %int1_1321, %int7_1322, %int25_1323, %int64_1324 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2017 = torch.aten.reshape %2014#0, %2016 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1325 = torch.constant.int 1
    %int2_1326 = torch.constant.int 2
    %2018 = torch.aten.transpose.int %2017, %int1_1325, %int2_1326 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2019 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1327 = torch.constant.int 1
    %int7_1328 = torch.constant.int 7
    %int25_1329 = torch.constant.int 25
    %int64_1330 = torch.constant.int 64
    %2020 = torch.prim.ListConstruct %int1_1327, %int7_1328, %int25_1329, %int64_1330 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2021 = torch.aten.reshape %2014#1, %2020 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1331 = torch.constant.int 1
    %int2_1332 = torch.constant.int 2
    %2022 = torch.aten.transpose.int %2021, %int1_1331, %int2_1332 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2023 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1333 = torch.constant.int 1
    %int7_1334 = torch.constant.int 7
    %int25_1335 = torch.constant.int 25
    %int64_1336 = torch.constant.int 64
    %2024 = torch.prim.ListConstruct %int1_1333, %int7_1334, %int25_1335, %int64_1336 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2025 = torch.aten.reshape %2014#2, %2024 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1337 = torch.constant.int 1
    %int2_1338 = torch.constant.int 2
    %2026 = torch.aten.transpose.int %2025, %int1_1337, %int2_1338 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1339 = torch.constant.int 1
    %int2_1340 = torch.constant.int 2
    %2027 = torch.aten.transpose.int %2021, %int1_1339, %int2_1340 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_1341 = torch.constant.int 2
    %int3_1342 = torch.constant.int 3
    %2028 = torch.aten.transpose.int %2027, %int2_1341, %int3_1342 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %2029 = torch.aten.matmul %2018, %2028 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2030 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2031 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2032 = torch.aten.pow.Tensor_Tensor %2030, %2031 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %2033 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_1343 = torch.constant.int 0
    %2034 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_1344 = torch.constant.none
    %float0.000000e00_1345 = torch.constant.float 0.000000e+00
    %2035 = torch.aten.full %2034, %float0.000000e00_1345, %none_1344, %none_1344, %none_1344, %none_1344 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_1346 = torch.constant.int 1
    %2036 = torch.aten.add.Tensor %2035, %2032, %int1_1346 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %2037 = torch.aten.div.Tensor %2029, %2036 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2038 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_1347 = torch.constant.int 6
    %none_1348 = torch.constant.none
    %false_1349 = torch.constant.bool false
    %2039 = torch.aten.to.dtype %2037, %int6_1347, %false_1349, %false_1349, %none_1348 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2040 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %2041 = torch.aten.where.self %2038, %2039, %2040 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_1350 = torch.constant.int 3
    %none_1351 = torch.constant.none
    %2042 = torch.aten.softmax.int %2041, %int3_1350, %none_1351 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_1352 = torch.constant.int 6
    %none_1353 = torch.constant.none
    %false_1354 = torch.constant.bool false
    %2043 = torch.aten.to.dtype %2042, %int6_1352, %false_1354, %false_1354, %none_1353 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2044 = torch.aten.matmul %2043, %2026 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1355 = torch.constant.int 1
    %int2_1356 = torch.constant.int 2
    %2045 = torch.aten.transpose.int %2044, %int1_1355, %int2_1356 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %2046 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1357 = torch.constant.int 1
    %int7_1358 = torch.constant.int 7
    %int1600_1359 = torch.constant.int 1600
    %2047 = torch.prim.ListConstruct %int1_1357, %int7_1358, %int1600_1359 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2048 = torch.aten.reshape %2045, %2047 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %2049 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1360 = torch.constant.int 7
    %int1600_1361 = torch.constant.int 1600
    %2050 = torch.prim.ListConstruct %int7_1360, %int1600_1361 : (!torch.int, !torch.int) -> !torch.list<int>
    %2051 = torch.aten.reshape %2048, %2050 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1362 = torch.constant.int 0
    %int1_1363 = torch.constant.int 1
    %2052 = torch.aten.mm %2051, %198 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2053 = torch.aten.add.Tensor %2052, %199, %int1_1363 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2054 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1364 = torch.constant.int 1
    %int7_1365 = torch.constant.int 7
    %int1600_1366 = torch.constant.int 1600
    %2055 = torch.prim.ListConstruct %int1_1364, %int7_1365, %int1600_1366 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2056 = torch.aten.reshape %2053, %2055 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1367 = torch.constant.int 1
    %2057 = torch.aten.add.Tensor %2056, %2001, %int1_1367 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1368 = torch.constant.float 9.9999997473787516E-6
    %int1600_1369 = torch.constant.int 1600
    %2058 = torch.prim.ListConstruct %int1600_1369 : (!torch.int) -> !torch.list<int>
    %result0_1370, %result1_1371, %result2_1372 = torch.aten.native_layer_norm %2057, %2058, %200, %201, %float9.999990e-06_1368 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2059 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1373 = torch.constant.int 7
    %int1600_1374 = torch.constant.int 1600
    %2060 = torch.prim.ListConstruct %int7_1373, %int1600_1374 : (!torch.int, !torch.int) -> !torch.list<int>
    %2061 = torch.aten.reshape %result0_1370, %2060 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1375 = torch.constant.int 0
    %int1_1376 = torch.constant.int 1
    %2062 = torch.aten.mm %2061, %202 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %2063 = torch.aten.add.Tensor %2062, %203, %int1_1376 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %2064 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1377 = torch.constant.int 1
    %int7_1378 = torch.constant.int 7
    %int6400_1379 = torch.constant.int 6400
    %2065 = torch.prim.ListConstruct %int1_1377, %int7_1378, %int6400_1379 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2066 = torch.aten.reshape %2063, %2065 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %2067 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2068 = torch.aten.mul.Tensor %2066, %2067 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2069 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %2070 = torch.aten.pow.Tensor_Tensor %2066, %2069 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2071 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2072 = torch.aten.mul.Tensor %2070, %2071 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_1380 = torch.constant.int 1
    %2073 = torch.aten.add.Tensor %2066, %2072, %int1_1380 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2074 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %2075 = torch.aten.mul.Tensor %2073, %2074 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2076 = torch.aten.tanh %2075 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2077 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_1381 = torch.constant.int 1
    %2078 = torch.aten.add.Tensor %2076, %2077, %int1_1381 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2079 = torch.aten.mul.Tensor %2068, %2078 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2080 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1382 = torch.constant.int 7
    %int6400_1383 = torch.constant.int 6400
    %2081 = torch.prim.ListConstruct %int7_1382, %int6400_1383 : (!torch.int, !torch.int) -> !torch.list<int>
    %2082 = torch.aten.reshape %2079, %2081 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_1384 = torch.constant.int 0
    %int1_1385 = torch.constant.int 1
    %2083 = torch.aten.mm %2082, %204 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2084 = torch.aten.add.Tensor %2083, %205, %int1_1385 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2085 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1386 = torch.constant.int 1
    %int7_1387 = torch.constant.int 7
    %int1600_1388 = torch.constant.int 1600
    %2086 = torch.prim.ListConstruct %int1_1386, %int7_1387, %int1600_1388 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2087 = torch.aten.reshape %2084, %2086 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1389 = torch.constant.int 1
    %2088 = torch.aten.add.Tensor %2057, %2087, %int1_1389 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1390 = torch.constant.float 9.9999997473787516E-6
    %int1600_1391 = torch.constant.int 1600
    %2089 = torch.prim.ListConstruct %int1600_1391 : (!torch.int) -> !torch.list<int>
    %result0_1392, %result1_1393, %result2_1394 = torch.aten.native_layer_norm %2088, %2089, %206, %207, %float9.999990e-06_1390 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2090 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1395 = torch.constant.int 7
    %int1600_1396 = torch.constant.int 1600
    %2091 = torch.prim.ListConstruct %int7_1395, %int1600_1396 : (!torch.int, !torch.int) -> !torch.list<int>
    %2092 = torch.aten.reshape %result0_1392, %2091 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1397 = torch.constant.int 0
    %int1_1398 = torch.constant.int 1
    %2093 = torch.aten.mm %2092, %208 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %2094 = torch.aten.add.Tensor %2093, %209, %int1_1398 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %2095 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1399 = torch.constant.int 1
    %int7_1400 = torch.constant.int 7
    %int4800_1401 = torch.constant.int 4800
    %2096 = torch.prim.ListConstruct %int1_1399, %int7_1400, %int4800_1401 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2097 = torch.aten.reshape %2094, %2096 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %2098 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %2099 = torch.prim.tolist(%2098) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_1402 = torch.constant.int 2
    %2100 = torch.aten.split_with_sizes %2097, %2099, %int2_1402 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %2101:3 = torch.prim.ListUnpack %2100 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %2102 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1403 = torch.constant.int 1
    %int7_1404 = torch.constant.int 7
    %int25_1405 = torch.constant.int 25
    %int64_1406 = torch.constant.int 64
    %2103 = torch.prim.ListConstruct %int1_1403, %int7_1404, %int25_1405, %int64_1406 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2104 = torch.aten.reshape %2101#0, %2103 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1407 = torch.constant.int 1
    %int2_1408 = torch.constant.int 2
    %2105 = torch.aten.transpose.int %2104, %int1_1407, %int2_1408 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2106 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1409 = torch.constant.int 1
    %int7_1410 = torch.constant.int 7
    %int25_1411 = torch.constant.int 25
    %int64_1412 = torch.constant.int 64
    %2107 = torch.prim.ListConstruct %int1_1409, %int7_1410, %int25_1411, %int64_1412 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2108 = torch.aten.reshape %2101#1, %2107 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1413 = torch.constant.int 1
    %int2_1414 = torch.constant.int 2
    %2109 = torch.aten.transpose.int %2108, %int1_1413, %int2_1414 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2110 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1415 = torch.constant.int 1
    %int7_1416 = torch.constant.int 7
    %int25_1417 = torch.constant.int 25
    %int64_1418 = torch.constant.int 64
    %2111 = torch.prim.ListConstruct %int1_1415, %int7_1416, %int25_1417, %int64_1418 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2112 = torch.aten.reshape %2101#2, %2111 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1419 = torch.constant.int 1
    %int2_1420 = torch.constant.int 2
    %2113 = torch.aten.transpose.int %2112, %int1_1419, %int2_1420 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1421 = torch.constant.int 1
    %int2_1422 = torch.constant.int 2
    %2114 = torch.aten.transpose.int %2108, %int1_1421, %int2_1422 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_1423 = torch.constant.int 2
    %int3_1424 = torch.constant.int 3
    %2115 = torch.aten.transpose.int %2114, %int2_1423, %int3_1424 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %2116 = torch.aten.matmul %2105, %2115 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2117 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2118 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2119 = torch.aten.pow.Tensor_Tensor %2117, %2118 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %2120 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_1425 = torch.constant.int 0
    %2121 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_1426 = torch.constant.none
    %float0.000000e00_1427 = torch.constant.float 0.000000e+00
    %2122 = torch.aten.full %2121, %float0.000000e00_1427, %none_1426, %none_1426, %none_1426, %none_1426 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_1428 = torch.constant.int 1
    %2123 = torch.aten.add.Tensor %2122, %2119, %int1_1428 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %2124 = torch.aten.div.Tensor %2116, %2123 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2125 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_1429 = torch.constant.int 6
    %none_1430 = torch.constant.none
    %false_1431 = torch.constant.bool false
    %2126 = torch.aten.to.dtype %2124, %int6_1429, %false_1431, %false_1431, %none_1430 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2127 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %2128 = torch.aten.where.self %2125, %2126, %2127 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_1432 = torch.constant.int 3
    %none_1433 = torch.constant.none
    %2129 = torch.aten.softmax.int %2128, %int3_1432, %none_1433 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_1434 = torch.constant.int 6
    %none_1435 = torch.constant.none
    %false_1436 = torch.constant.bool false
    %2130 = torch.aten.to.dtype %2129, %int6_1434, %false_1436, %false_1436, %none_1435 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2131 = torch.aten.matmul %2130, %2113 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1437 = torch.constant.int 1
    %int2_1438 = torch.constant.int 2
    %2132 = torch.aten.transpose.int %2131, %int1_1437, %int2_1438 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %2133 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1439 = torch.constant.int 1
    %int7_1440 = torch.constant.int 7
    %int1600_1441 = torch.constant.int 1600
    %2134 = torch.prim.ListConstruct %int1_1439, %int7_1440, %int1600_1441 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2135 = torch.aten.reshape %2132, %2134 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %2136 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1442 = torch.constant.int 7
    %int1600_1443 = torch.constant.int 1600
    %2137 = torch.prim.ListConstruct %int7_1442, %int1600_1443 : (!torch.int, !torch.int) -> !torch.list<int>
    %2138 = torch.aten.reshape %2135, %2137 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1444 = torch.constant.int 0
    %int1_1445 = torch.constant.int 1
    %2139 = torch.aten.mm %2138, %210 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2140 = torch.aten.add.Tensor %2139, %211, %int1_1445 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2141 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1446 = torch.constant.int 1
    %int7_1447 = torch.constant.int 7
    %int1600_1448 = torch.constant.int 1600
    %2142 = torch.prim.ListConstruct %int1_1446, %int7_1447, %int1600_1448 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2143 = torch.aten.reshape %2140, %2142 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1449 = torch.constant.int 1
    %2144 = torch.aten.add.Tensor %2143, %2088, %int1_1449 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1450 = torch.constant.float 9.9999997473787516E-6
    %int1600_1451 = torch.constant.int 1600
    %2145 = torch.prim.ListConstruct %int1600_1451 : (!torch.int) -> !torch.list<int>
    %result0_1452, %result1_1453, %result2_1454 = torch.aten.native_layer_norm %2144, %2145, %212, %213, %float9.999990e-06_1450 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2146 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1455 = torch.constant.int 7
    %int1600_1456 = torch.constant.int 1600
    %2147 = torch.prim.ListConstruct %int7_1455, %int1600_1456 : (!torch.int, !torch.int) -> !torch.list<int>
    %2148 = torch.aten.reshape %result0_1452, %2147 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1457 = torch.constant.int 0
    %int1_1458 = torch.constant.int 1
    %2149 = torch.aten.mm %2148, %214 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %2150 = torch.aten.add.Tensor %2149, %215, %int1_1458 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %2151 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1459 = torch.constant.int 1
    %int7_1460 = torch.constant.int 7
    %int6400_1461 = torch.constant.int 6400
    %2152 = torch.prim.ListConstruct %int1_1459, %int7_1460, %int6400_1461 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2153 = torch.aten.reshape %2150, %2152 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %2154 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2155 = torch.aten.mul.Tensor %2153, %2154 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2156 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %2157 = torch.aten.pow.Tensor_Tensor %2153, %2156 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2158 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2159 = torch.aten.mul.Tensor %2157, %2158 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_1462 = torch.constant.int 1
    %2160 = torch.aten.add.Tensor %2153, %2159, %int1_1462 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2161 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %2162 = torch.aten.mul.Tensor %2160, %2161 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2163 = torch.aten.tanh %2162 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2164 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_1463 = torch.constant.int 1
    %2165 = torch.aten.add.Tensor %2163, %2164, %int1_1463 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2166 = torch.aten.mul.Tensor %2155, %2165 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2167 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1464 = torch.constant.int 7
    %int6400_1465 = torch.constant.int 6400
    %2168 = torch.prim.ListConstruct %int7_1464, %int6400_1465 : (!torch.int, !torch.int) -> !torch.list<int>
    %2169 = torch.aten.reshape %2166, %2168 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_1466 = torch.constant.int 0
    %int1_1467 = torch.constant.int 1
    %2170 = torch.aten.mm %2169, %216 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2171 = torch.aten.add.Tensor %2170, %217, %int1_1467 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2172 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1468 = torch.constant.int 1
    %int7_1469 = torch.constant.int 7
    %int1600_1470 = torch.constant.int 1600
    %2173 = torch.prim.ListConstruct %int1_1468, %int7_1469, %int1600_1470 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2174 = torch.aten.reshape %2171, %2173 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1471 = torch.constant.int 1
    %2175 = torch.aten.add.Tensor %2144, %2174, %int1_1471 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1472 = torch.constant.float 9.9999997473787516E-6
    %int1600_1473 = torch.constant.int 1600
    %2176 = torch.prim.ListConstruct %int1600_1473 : (!torch.int) -> !torch.list<int>
    %result0_1474, %result1_1475, %result2_1476 = torch.aten.native_layer_norm %2175, %2176, %218, %219, %float9.999990e-06_1472 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2177 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1477 = torch.constant.int 7
    %int1600_1478 = torch.constant.int 1600
    %2178 = torch.prim.ListConstruct %int7_1477, %int1600_1478 : (!torch.int, !torch.int) -> !torch.list<int>
    %2179 = torch.aten.reshape %result0_1474, %2178 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1479 = torch.constant.int 0
    %int1_1480 = torch.constant.int 1
    %2180 = torch.aten.mm %2179, %220 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %2181 = torch.aten.add.Tensor %2180, %221, %int1_1480 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %2182 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1481 = torch.constant.int 1
    %int7_1482 = torch.constant.int 7
    %int4800_1483 = torch.constant.int 4800
    %2183 = torch.prim.ListConstruct %int1_1481, %int7_1482, %int4800_1483 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2184 = torch.aten.reshape %2181, %2183 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %2185 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %2186 = torch.prim.tolist(%2185) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_1484 = torch.constant.int 2
    %2187 = torch.aten.split_with_sizes %2184, %2186, %int2_1484 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %2188:3 = torch.prim.ListUnpack %2187 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %2189 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1485 = torch.constant.int 1
    %int7_1486 = torch.constant.int 7
    %int25_1487 = torch.constant.int 25
    %int64_1488 = torch.constant.int 64
    %2190 = torch.prim.ListConstruct %int1_1485, %int7_1486, %int25_1487, %int64_1488 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2191 = torch.aten.reshape %2188#0, %2190 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1489 = torch.constant.int 1
    %int2_1490 = torch.constant.int 2
    %2192 = torch.aten.transpose.int %2191, %int1_1489, %int2_1490 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2193 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1491 = torch.constant.int 1
    %int7_1492 = torch.constant.int 7
    %int25_1493 = torch.constant.int 25
    %int64_1494 = torch.constant.int 64
    %2194 = torch.prim.ListConstruct %int1_1491, %int7_1492, %int25_1493, %int64_1494 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2195 = torch.aten.reshape %2188#1, %2194 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1495 = torch.constant.int 1
    %int2_1496 = torch.constant.int 2
    %2196 = torch.aten.transpose.int %2195, %int1_1495, %int2_1496 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2197 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1497 = torch.constant.int 1
    %int7_1498 = torch.constant.int 7
    %int25_1499 = torch.constant.int 25
    %int64_1500 = torch.constant.int 64
    %2198 = torch.prim.ListConstruct %int1_1497, %int7_1498, %int25_1499, %int64_1500 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2199 = torch.aten.reshape %2188#2, %2198 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1501 = torch.constant.int 1
    %int2_1502 = torch.constant.int 2
    %2200 = torch.aten.transpose.int %2199, %int1_1501, %int2_1502 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1503 = torch.constant.int 1
    %int2_1504 = torch.constant.int 2
    %2201 = torch.aten.transpose.int %2195, %int1_1503, %int2_1504 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_1505 = torch.constant.int 2
    %int3_1506 = torch.constant.int 3
    %2202 = torch.aten.transpose.int %2201, %int2_1505, %int3_1506 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %2203 = torch.aten.matmul %2192, %2202 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2204 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2205 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2206 = torch.aten.pow.Tensor_Tensor %2204, %2205 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %2207 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_1507 = torch.constant.int 0
    %2208 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_1508 = torch.constant.none
    %float0.000000e00_1509 = torch.constant.float 0.000000e+00
    %2209 = torch.aten.full %2208, %float0.000000e00_1509, %none_1508, %none_1508, %none_1508, %none_1508 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_1510 = torch.constant.int 1
    %2210 = torch.aten.add.Tensor %2209, %2206, %int1_1510 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %2211 = torch.aten.div.Tensor %2203, %2210 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2212 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_1511 = torch.constant.int 6
    %none_1512 = torch.constant.none
    %false_1513 = torch.constant.bool false
    %2213 = torch.aten.to.dtype %2211, %int6_1511, %false_1513, %false_1513, %none_1512 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2214 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %2215 = torch.aten.where.self %2212, %2213, %2214 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_1514 = torch.constant.int 3
    %none_1515 = torch.constant.none
    %2216 = torch.aten.softmax.int %2215, %int3_1514, %none_1515 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_1516 = torch.constant.int 6
    %none_1517 = torch.constant.none
    %false_1518 = torch.constant.bool false
    %2217 = torch.aten.to.dtype %2216, %int6_1516, %false_1518, %false_1518, %none_1517 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2218 = torch.aten.matmul %2217, %2200 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1519 = torch.constant.int 1
    %int2_1520 = torch.constant.int 2
    %2219 = torch.aten.transpose.int %2218, %int1_1519, %int2_1520 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %2220 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1521 = torch.constant.int 1
    %int7_1522 = torch.constant.int 7
    %int1600_1523 = torch.constant.int 1600
    %2221 = torch.prim.ListConstruct %int1_1521, %int7_1522, %int1600_1523 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2222 = torch.aten.reshape %2219, %2221 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %2223 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1524 = torch.constant.int 7
    %int1600_1525 = torch.constant.int 1600
    %2224 = torch.prim.ListConstruct %int7_1524, %int1600_1525 : (!torch.int, !torch.int) -> !torch.list<int>
    %2225 = torch.aten.reshape %2222, %2224 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1526 = torch.constant.int 0
    %int1_1527 = torch.constant.int 1
    %2226 = torch.aten.mm %2225, %222 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2227 = torch.aten.add.Tensor %2226, %223, %int1_1527 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2228 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1528 = torch.constant.int 1
    %int7_1529 = torch.constant.int 7
    %int1600_1530 = torch.constant.int 1600
    %2229 = torch.prim.ListConstruct %int1_1528, %int7_1529, %int1600_1530 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2230 = torch.aten.reshape %2227, %2229 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1531 = torch.constant.int 1
    %2231 = torch.aten.add.Tensor %2230, %2175, %int1_1531 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1532 = torch.constant.float 9.9999997473787516E-6
    %int1600_1533 = torch.constant.int 1600
    %2232 = torch.prim.ListConstruct %int1600_1533 : (!torch.int) -> !torch.list<int>
    %result0_1534, %result1_1535, %result2_1536 = torch.aten.native_layer_norm %2231, %2232, %224, %225, %float9.999990e-06_1532 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2233 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1537 = torch.constant.int 7
    %int1600_1538 = torch.constant.int 1600
    %2234 = torch.prim.ListConstruct %int7_1537, %int1600_1538 : (!torch.int, !torch.int) -> !torch.list<int>
    %2235 = torch.aten.reshape %result0_1534, %2234 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1539 = torch.constant.int 0
    %int1_1540 = torch.constant.int 1
    %2236 = torch.aten.mm %2235, %226 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %2237 = torch.aten.add.Tensor %2236, %227, %int1_1540 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %2238 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1541 = torch.constant.int 1
    %int7_1542 = torch.constant.int 7
    %int6400_1543 = torch.constant.int 6400
    %2239 = torch.prim.ListConstruct %int1_1541, %int7_1542, %int6400_1543 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2240 = torch.aten.reshape %2237, %2239 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %2241 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2242 = torch.aten.mul.Tensor %2240, %2241 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2243 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %2244 = torch.aten.pow.Tensor_Tensor %2240, %2243 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2245 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2246 = torch.aten.mul.Tensor %2244, %2245 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_1544 = torch.constant.int 1
    %2247 = torch.aten.add.Tensor %2240, %2246, %int1_1544 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2248 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %2249 = torch.aten.mul.Tensor %2247, %2248 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2250 = torch.aten.tanh %2249 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2251 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_1545 = torch.constant.int 1
    %2252 = torch.aten.add.Tensor %2250, %2251, %int1_1545 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2253 = torch.aten.mul.Tensor %2242, %2252 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2254 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1546 = torch.constant.int 7
    %int6400_1547 = torch.constant.int 6400
    %2255 = torch.prim.ListConstruct %int7_1546, %int6400_1547 : (!torch.int, !torch.int) -> !torch.list<int>
    %2256 = torch.aten.reshape %2253, %2255 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_1548 = torch.constant.int 0
    %int1_1549 = torch.constant.int 1
    %2257 = torch.aten.mm %2256, %228 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2258 = torch.aten.add.Tensor %2257, %229, %int1_1549 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2259 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1550 = torch.constant.int 1
    %int7_1551 = torch.constant.int 7
    %int1600_1552 = torch.constant.int 1600
    %2260 = torch.prim.ListConstruct %int1_1550, %int7_1551, %int1600_1552 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2261 = torch.aten.reshape %2258, %2260 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1553 = torch.constant.int 1
    %2262 = torch.aten.add.Tensor %2231, %2261, %int1_1553 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1554 = torch.constant.float 9.9999997473787516E-6
    %int1600_1555 = torch.constant.int 1600
    %2263 = torch.prim.ListConstruct %int1600_1555 : (!torch.int) -> !torch.list<int>
    %result0_1556, %result1_1557, %result2_1558 = torch.aten.native_layer_norm %2262, %2263, %230, %231, %float9.999990e-06_1554 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2264 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1559 = torch.constant.int 7
    %int1600_1560 = torch.constant.int 1600
    %2265 = torch.prim.ListConstruct %int7_1559, %int1600_1560 : (!torch.int, !torch.int) -> !torch.list<int>
    %2266 = torch.aten.reshape %result0_1556, %2265 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1561 = torch.constant.int 0
    %int1_1562 = torch.constant.int 1
    %2267 = torch.aten.mm %2266, %232 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %2268 = torch.aten.add.Tensor %2267, %233, %int1_1562 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %2269 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1563 = torch.constant.int 1
    %int7_1564 = torch.constant.int 7
    %int4800_1565 = torch.constant.int 4800
    %2270 = torch.prim.ListConstruct %int1_1563, %int7_1564, %int4800_1565 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2271 = torch.aten.reshape %2268, %2270 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %2272 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %2273 = torch.prim.tolist(%2272) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_1566 = torch.constant.int 2
    %2274 = torch.aten.split_with_sizes %2271, %2273, %int2_1566 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %2275:3 = torch.prim.ListUnpack %2274 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %2276 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1567 = torch.constant.int 1
    %int7_1568 = torch.constant.int 7
    %int25_1569 = torch.constant.int 25
    %int64_1570 = torch.constant.int 64
    %2277 = torch.prim.ListConstruct %int1_1567, %int7_1568, %int25_1569, %int64_1570 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2278 = torch.aten.reshape %2275#0, %2277 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1571 = torch.constant.int 1
    %int2_1572 = torch.constant.int 2
    %2279 = torch.aten.transpose.int %2278, %int1_1571, %int2_1572 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2280 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1573 = torch.constant.int 1
    %int7_1574 = torch.constant.int 7
    %int25_1575 = torch.constant.int 25
    %int64_1576 = torch.constant.int 64
    %2281 = torch.prim.ListConstruct %int1_1573, %int7_1574, %int25_1575, %int64_1576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2282 = torch.aten.reshape %2275#1, %2281 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1577 = torch.constant.int 1
    %int2_1578 = torch.constant.int 2
    %2283 = torch.aten.transpose.int %2282, %int1_1577, %int2_1578 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2284 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1579 = torch.constant.int 1
    %int7_1580 = torch.constant.int 7
    %int25_1581 = torch.constant.int 25
    %int64_1582 = torch.constant.int 64
    %2285 = torch.prim.ListConstruct %int1_1579, %int7_1580, %int25_1581, %int64_1582 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2286 = torch.aten.reshape %2275#2, %2285 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1583 = torch.constant.int 1
    %int2_1584 = torch.constant.int 2
    %2287 = torch.aten.transpose.int %2286, %int1_1583, %int2_1584 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1585 = torch.constant.int 1
    %int2_1586 = torch.constant.int 2
    %2288 = torch.aten.transpose.int %2282, %int1_1585, %int2_1586 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_1587 = torch.constant.int 2
    %int3_1588 = torch.constant.int 3
    %2289 = torch.aten.transpose.int %2288, %int2_1587, %int3_1588 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %2290 = torch.aten.matmul %2279, %2289 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2291 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2292 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2293 = torch.aten.pow.Tensor_Tensor %2291, %2292 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %2294 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_1589 = torch.constant.int 0
    %2295 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_1590 = torch.constant.none
    %float0.000000e00_1591 = torch.constant.float 0.000000e+00
    %2296 = torch.aten.full %2295, %float0.000000e00_1591, %none_1590, %none_1590, %none_1590, %none_1590 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_1592 = torch.constant.int 1
    %2297 = torch.aten.add.Tensor %2296, %2293, %int1_1592 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %2298 = torch.aten.div.Tensor %2290, %2297 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2299 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_1593 = torch.constant.int 6
    %none_1594 = torch.constant.none
    %false_1595 = torch.constant.bool false
    %2300 = torch.aten.to.dtype %2298, %int6_1593, %false_1595, %false_1595, %none_1594 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2301 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %2302 = torch.aten.where.self %2299, %2300, %2301 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_1596 = torch.constant.int 3
    %none_1597 = torch.constant.none
    %2303 = torch.aten.softmax.int %2302, %int3_1596, %none_1597 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_1598 = torch.constant.int 6
    %none_1599 = torch.constant.none
    %false_1600 = torch.constant.bool false
    %2304 = torch.aten.to.dtype %2303, %int6_1598, %false_1600, %false_1600, %none_1599 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2305 = torch.aten.matmul %2304, %2287 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1601 = torch.constant.int 1
    %int2_1602 = torch.constant.int 2
    %2306 = torch.aten.transpose.int %2305, %int1_1601, %int2_1602 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %2307 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1603 = torch.constant.int 1
    %int7_1604 = torch.constant.int 7
    %int1600_1605 = torch.constant.int 1600
    %2308 = torch.prim.ListConstruct %int1_1603, %int7_1604, %int1600_1605 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2309 = torch.aten.reshape %2306, %2308 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %2310 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1606 = torch.constant.int 7
    %int1600_1607 = torch.constant.int 1600
    %2311 = torch.prim.ListConstruct %int7_1606, %int1600_1607 : (!torch.int, !torch.int) -> !torch.list<int>
    %2312 = torch.aten.reshape %2309, %2311 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1608 = torch.constant.int 0
    %int1_1609 = torch.constant.int 1
    %2313 = torch.aten.mm %2312, %234 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2314 = torch.aten.add.Tensor %2313, %235, %int1_1609 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2315 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1610 = torch.constant.int 1
    %int7_1611 = torch.constant.int 7
    %int1600_1612 = torch.constant.int 1600
    %2316 = torch.prim.ListConstruct %int1_1610, %int7_1611, %int1600_1612 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2317 = torch.aten.reshape %2314, %2316 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1613 = torch.constant.int 1
    %2318 = torch.aten.add.Tensor %2317, %2262, %int1_1613 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1614 = torch.constant.float 9.9999997473787516E-6
    %int1600_1615 = torch.constant.int 1600
    %2319 = torch.prim.ListConstruct %int1600_1615 : (!torch.int) -> !torch.list<int>
    %result0_1616, %result1_1617, %result2_1618 = torch.aten.native_layer_norm %2318, %2319, %236, %237, %float9.999990e-06_1614 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2320 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1619 = torch.constant.int 7
    %int1600_1620 = torch.constant.int 1600
    %2321 = torch.prim.ListConstruct %int7_1619, %int1600_1620 : (!torch.int, !torch.int) -> !torch.list<int>
    %2322 = torch.aten.reshape %result0_1616, %2321 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1621 = torch.constant.int 0
    %int1_1622 = torch.constant.int 1
    %2323 = torch.aten.mm %2322, %238 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %2324 = torch.aten.add.Tensor %2323, %239, %int1_1622 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %2325 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1623 = torch.constant.int 1
    %int7_1624 = torch.constant.int 7
    %int6400_1625 = torch.constant.int 6400
    %2326 = torch.prim.ListConstruct %int1_1623, %int7_1624, %int6400_1625 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2327 = torch.aten.reshape %2324, %2326 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %2328 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2329 = torch.aten.mul.Tensor %2327, %2328 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2330 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %2331 = torch.aten.pow.Tensor_Tensor %2327, %2330 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2332 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2333 = torch.aten.mul.Tensor %2331, %2332 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_1626 = torch.constant.int 1
    %2334 = torch.aten.add.Tensor %2327, %2333, %int1_1626 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2335 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %2336 = torch.aten.mul.Tensor %2334, %2335 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2337 = torch.aten.tanh %2336 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2338 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_1627 = torch.constant.int 1
    %2339 = torch.aten.add.Tensor %2337, %2338, %int1_1627 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2340 = torch.aten.mul.Tensor %2329, %2339 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2341 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1628 = torch.constant.int 7
    %int6400_1629 = torch.constant.int 6400
    %2342 = torch.prim.ListConstruct %int7_1628, %int6400_1629 : (!torch.int, !torch.int) -> !torch.list<int>
    %2343 = torch.aten.reshape %2340, %2342 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_1630 = torch.constant.int 0
    %int1_1631 = torch.constant.int 1
    %2344 = torch.aten.mm %2343, %240 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2345 = torch.aten.add.Tensor %2344, %241, %int1_1631 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2346 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1632 = torch.constant.int 1
    %int7_1633 = torch.constant.int 7
    %int1600_1634 = torch.constant.int 1600
    %2347 = torch.prim.ListConstruct %int1_1632, %int7_1633, %int1600_1634 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2348 = torch.aten.reshape %2345, %2347 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1635 = torch.constant.int 1
    %2349 = torch.aten.add.Tensor %2318, %2348, %int1_1635 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1636 = torch.constant.float 9.9999997473787516E-6
    %int1600_1637 = torch.constant.int 1600
    %2350 = torch.prim.ListConstruct %int1600_1637 : (!torch.int) -> !torch.list<int>
    %result0_1638, %result1_1639, %result2_1640 = torch.aten.native_layer_norm %2349, %2350, %242, %243, %float9.999990e-06_1636 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2351 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1641 = torch.constant.int 7
    %int1600_1642 = torch.constant.int 1600
    %2352 = torch.prim.ListConstruct %int7_1641, %int1600_1642 : (!torch.int, !torch.int) -> !torch.list<int>
    %2353 = torch.aten.reshape %result0_1638, %2352 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1643 = torch.constant.int 0
    %int1_1644 = torch.constant.int 1
    %2354 = torch.aten.mm %2353, %244 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %2355 = torch.aten.add.Tensor %2354, %245, %int1_1644 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %2356 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1645 = torch.constant.int 1
    %int7_1646 = torch.constant.int 7
    %int4800_1647 = torch.constant.int 4800
    %2357 = torch.prim.ListConstruct %int1_1645, %int7_1646, %int4800_1647 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2358 = torch.aten.reshape %2355, %2357 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %2359 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %2360 = torch.prim.tolist(%2359) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_1648 = torch.constant.int 2
    %2361 = torch.aten.split_with_sizes %2358, %2360, %int2_1648 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %2362:3 = torch.prim.ListUnpack %2361 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %2363 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1649 = torch.constant.int 1
    %int7_1650 = torch.constant.int 7
    %int25_1651 = torch.constant.int 25
    %int64_1652 = torch.constant.int 64
    %2364 = torch.prim.ListConstruct %int1_1649, %int7_1650, %int25_1651, %int64_1652 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2365 = torch.aten.reshape %2362#0, %2364 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1653 = torch.constant.int 1
    %int2_1654 = torch.constant.int 2
    %2366 = torch.aten.transpose.int %2365, %int1_1653, %int2_1654 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2367 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1655 = torch.constant.int 1
    %int7_1656 = torch.constant.int 7
    %int25_1657 = torch.constant.int 25
    %int64_1658 = torch.constant.int 64
    %2368 = torch.prim.ListConstruct %int1_1655, %int7_1656, %int25_1657, %int64_1658 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2369 = torch.aten.reshape %2362#1, %2368 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1659 = torch.constant.int 1
    %int2_1660 = torch.constant.int 2
    %2370 = torch.aten.transpose.int %2369, %int1_1659, %int2_1660 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2371 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1661 = torch.constant.int 1
    %int7_1662 = torch.constant.int 7
    %int25_1663 = torch.constant.int 25
    %int64_1664 = torch.constant.int 64
    %2372 = torch.prim.ListConstruct %int1_1661, %int7_1662, %int25_1663, %int64_1664 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2373 = torch.aten.reshape %2362#2, %2372 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1665 = torch.constant.int 1
    %int2_1666 = torch.constant.int 2
    %2374 = torch.aten.transpose.int %2373, %int1_1665, %int2_1666 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1667 = torch.constant.int 1
    %int2_1668 = torch.constant.int 2
    %2375 = torch.aten.transpose.int %2369, %int1_1667, %int2_1668 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_1669 = torch.constant.int 2
    %int3_1670 = torch.constant.int 3
    %2376 = torch.aten.transpose.int %2375, %int2_1669, %int3_1670 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %2377 = torch.aten.matmul %2366, %2376 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2378 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2379 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2380 = torch.aten.pow.Tensor_Tensor %2378, %2379 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %2381 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_1671 = torch.constant.int 0
    %2382 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_1672 = torch.constant.none
    %float0.000000e00_1673 = torch.constant.float 0.000000e+00
    %2383 = torch.aten.full %2382, %float0.000000e00_1673, %none_1672, %none_1672, %none_1672, %none_1672 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_1674 = torch.constant.int 1
    %2384 = torch.aten.add.Tensor %2383, %2380, %int1_1674 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %2385 = torch.aten.div.Tensor %2377, %2384 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2386 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_1675 = torch.constant.int 6
    %none_1676 = torch.constant.none
    %false_1677 = torch.constant.bool false
    %2387 = torch.aten.to.dtype %2385, %int6_1675, %false_1677, %false_1677, %none_1676 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2388 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %2389 = torch.aten.where.self %2386, %2387, %2388 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_1678 = torch.constant.int 3
    %none_1679 = torch.constant.none
    %2390 = torch.aten.softmax.int %2389, %int3_1678, %none_1679 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_1680 = torch.constant.int 6
    %none_1681 = torch.constant.none
    %false_1682 = torch.constant.bool false
    %2391 = torch.aten.to.dtype %2390, %int6_1680, %false_1682, %false_1682, %none_1681 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2392 = torch.aten.matmul %2391, %2374 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1683 = torch.constant.int 1
    %int2_1684 = torch.constant.int 2
    %2393 = torch.aten.transpose.int %2392, %int1_1683, %int2_1684 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %2394 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1685 = torch.constant.int 1
    %int7_1686 = torch.constant.int 7
    %int1600_1687 = torch.constant.int 1600
    %2395 = torch.prim.ListConstruct %int1_1685, %int7_1686, %int1600_1687 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2396 = torch.aten.reshape %2393, %2395 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %2397 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1688 = torch.constant.int 7
    %int1600_1689 = torch.constant.int 1600
    %2398 = torch.prim.ListConstruct %int7_1688, %int1600_1689 : (!torch.int, !torch.int) -> !torch.list<int>
    %2399 = torch.aten.reshape %2396, %2398 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1690 = torch.constant.int 0
    %int1_1691 = torch.constant.int 1
    %2400 = torch.aten.mm %2399, %246 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2401 = torch.aten.add.Tensor %2400, %247, %int1_1691 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2402 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1692 = torch.constant.int 1
    %int7_1693 = torch.constant.int 7
    %int1600_1694 = torch.constant.int 1600
    %2403 = torch.prim.ListConstruct %int1_1692, %int7_1693, %int1600_1694 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2404 = torch.aten.reshape %2401, %2403 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1695 = torch.constant.int 1
    %2405 = torch.aten.add.Tensor %2404, %2349, %int1_1695 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1696 = torch.constant.float 9.9999997473787516E-6
    %int1600_1697 = torch.constant.int 1600
    %2406 = torch.prim.ListConstruct %int1600_1697 : (!torch.int) -> !torch.list<int>
    %result0_1698, %result1_1699, %result2_1700 = torch.aten.native_layer_norm %2405, %2406, %248, %249, %float9.999990e-06_1696 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2407 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1701 = torch.constant.int 7
    %int1600_1702 = torch.constant.int 1600
    %2408 = torch.prim.ListConstruct %int7_1701, %int1600_1702 : (!torch.int, !torch.int) -> !torch.list<int>
    %2409 = torch.aten.reshape %result0_1698, %2408 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1703 = torch.constant.int 0
    %int1_1704 = torch.constant.int 1
    %2410 = torch.aten.mm %2409, %250 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %2411 = torch.aten.add.Tensor %2410, %251, %int1_1704 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %2412 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1705 = torch.constant.int 1
    %int7_1706 = torch.constant.int 7
    %int6400_1707 = torch.constant.int 6400
    %2413 = torch.prim.ListConstruct %int1_1705, %int7_1706, %int6400_1707 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2414 = torch.aten.reshape %2411, %2413 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %2415 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2416 = torch.aten.mul.Tensor %2414, %2415 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2417 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %2418 = torch.aten.pow.Tensor_Tensor %2414, %2417 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2419 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2420 = torch.aten.mul.Tensor %2418, %2419 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_1708 = torch.constant.int 1
    %2421 = torch.aten.add.Tensor %2414, %2420, %int1_1708 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2422 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %2423 = torch.aten.mul.Tensor %2421, %2422 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2424 = torch.aten.tanh %2423 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2425 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_1709 = torch.constant.int 1
    %2426 = torch.aten.add.Tensor %2424, %2425, %int1_1709 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2427 = torch.aten.mul.Tensor %2416, %2426 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2428 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1710 = torch.constant.int 7
    %int6400_1711 = torch.constant.int 6400
    %2429 = torch.prim.ListConstruct %int7_1710, %int6400_1711 : (!torch.int, !torch.int) -> !torch.list<int>
    %2430 = torch.aten.reshape %2427, %2429 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_1712 = torch.constant.int 0
    %int1_1713 = torch.constant.int 1
    %2431 = torch.aten.mm %2430, %252 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2432 = torch.aten.add.Tensor %2431, %253, %int1_1713 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2433 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1714 = torch.constant.int 1
    %int7_1715 = torch.constant.int 7
    %int1600_1716 = torch.constant.int 1600
    %2434 = torch.prim.ListConstruct %int1_1714, %int7_1715, %int1600_1716 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2435 = torch.aten.reshape %2432, %2434 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1717 = torch.constant.int 1
    %2436 = torch.aten.add.Tensor %2405, %2435, %int1_1717 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1718 = torch.constant.float 9.9999997473787516E-6
    %int1600_1719 = torch.constant.int 1600
    %2437 = torch.prim.ListConstruct %int1600_1719 : (!torch.int) -> !torch.list<int>
    %result0_1720, %result1_1721, %result2_1722 = torch.aten.native_layer_norm %2436, %2437, %254, %255, %float9.999990e-06_1718 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2438 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1723 = torch.constant.int 7
    %int1600_1724 = torch.constant.int 1600
    %2439 = torch.prim.ListConstruct %int7_1723, %int1600_1724 : (!torch.int, !torch.int) -> !torch.list<int>
    %2440 = torch.aten.reshape %result0_1720, %2439 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1725 = torch.constant.int 0
    %int1_1726 = torch.constant.int 1
    %2441 = torch.aten.mm %2440, %256 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %2442 = torch.aten.add.Tensor %2441, %257, %int1_1726 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %2443 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1727 = torch.constant.int 1
    %int7_1728 = torch.constant.int 7
    %int4800_1729 = torch.constant.int 4800
    %2444 = torch.prim.ListConstruct %int1_1727, %int7_1728, %int4800_1729 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2445 = torch.aten.reshape %2442, %2444 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %2446 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %2447 = torch.prim.tolist(%2446) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_1730 = torch.constant.int 2
    %2448 = torch.aten.split_with_sizes %2445, %2447, %int2_1730 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %2449:3 = torch.prim.ListUnpack %2448 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %2450 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1731 = torch.constant.int 1
    %int7_1732 = torch.constant.int 7
    %int25_1733 = torch.constant.int 25
    %int64_1734 = torch.constant.int 64
    %2451 = torch.prim.ListConstruct %int1_1731, %int7_1732, %int25_1733, %int64_1734 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2452 = torch.aten.reshape %2449#0, %2451 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1735 = torch.constant.int 1
    %int2_1736 = torch.constant.int 2
    %2453 = torch.aten.transpose.int %2452, %int1_1735, %int2_1736 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2454 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1737 = torch.constant.int 1
    %int7_1738 = torch.constant.int 7
    %int25_1739 = torch.constant.int 25
    %int64_1740 = torch.constant.int 64
    %2455 = torch.prim.ListConstruct %int1_1737, %int7_1738, %int25_1739, %int64_1740 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2456 = torch.aten.reshape %2449#1, %2455 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1741 = torch.constant.int 1
    %int2_1742 = torch.constant.int 2
    %2457 = torch.aten.transpose.int %2456, %int1_1741, %int2_1742 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2458 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1743 = torch.constant.int 1
    %int7_1744 = torch.constant.int 7
    %int25_1745 = torch.constant.int 25
    %int64_1746 = torch.constant.int 64
    %2459 = torch.prim.ListConstruct %int1_1743, %int7_1744, %int25_1745, %int64_1746 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2460 = torch.aten.reshape %2449#2, %2459 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1747 = torch.constant.int 1
    %int2_1748 = torch.constant.int 2
    %2461 = torch.aten.transpose.int %2460, %int1_1747, %int2_1748 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1749 = torch.constant.int 1
    %int2_1750 = torch.constant.int 2
    %2462 = torch.aten.transpose.int %2456, %int1_1749, %int2_1750 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_1751 = torch.constant.int 2
    %int3_1752 = torch.constant.int 3
    %2463 = torch.aten.transpose.int %2462, %int2_1751, %int3_1752 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %2464 = torch.aten.matmul %2453, %2463 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2465 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2466 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2467 = torch.aten.pow.Tensor_Tensor %2465, %2466 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %2468 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_1753 = torch.constant.int 0
    %2469 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_1754 = torch.constant.none
    %float0.000000e00_1755 = torch.constant.float 0.000000e+00
    %2470 = torch.aten.full %2469, %float0.000000e00_1755, %none_1754, %none_1754, %none_1754, %none_1754 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_1756 = torch.constant.int 1
    %2471 = torch.aten.add.Tensor %2470, %2467, %int1_1756 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %2472 = torch.aten.div.Tensor %2464, %2471 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2473 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_1757 = torch.constant.int 6
    %none_1758 = torch.constant.none
    %false_1759 = torch.constant.bool false
    %2474 = torch.aten.to.dtype %2472, %int6_1757, %false_1759, %false_1759, %none_1758 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2475 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %2476 = torch.aten.where.self %2473, %2474, %2475 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_1760 = torch.constant.int 3
    %none_1761 = torch.constant.none
    %2477 = torch.aten.softmax.int %2476, %int3_1760, %none_1761 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_1762 = torch.constant.int 6
    %none_1763 = torch.constant.none
    %false_1764 = torch.constant.bool false
    %2478 = torch.aten.to.dtype %2477, %int6_1762, %false_1764, %false_1764, %none_1763 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2479 = torch.aten.matmul %2478, %2461 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1765 = torch.constant.int 1
    %int2_1766 = torch.constant.int 2
    %2480 = torch.aten.transpose.int %2479, %int1_1765, %int2_1766 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %2481 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1767 = torch.constant.int 1
    %int7_1768 = torch.constant.int 7
    %int1600_1769 = torch.constant.int 1600
    %2482 = torch.prim.ListConstruct %int1_1767, %int7_1768, %int1600_1769 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2483 = torch.aten.reshape %2480, %2482 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %2484 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1770 = torch.constant.int 7
    %int1600_1771 = torch.constant.int 1600
    %2485 = torch.prim.ListConstruct %int7_1770, %int1600_1771 : (!torch.int, !torch.int) -> !torch.list<int>
    %2486 = torch.aten.reshape %2483, %2485 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1772 = torch.constant.int 0
    %int1_1773 = torch.constant.int 1
    %2487 = torch.aten.mm %2486, %258 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2488 = torch.aten.add.Tensor %2487, %259, %int1_1773 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2489 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1774 = torch.constant.int 1
    %int7_1775 = torch.constant.int 7
    %int1600_1776 = torch.constant.int 1600
    %2490 = torch.prim.ListConstruct %int1_1774, %int7_1775, %int1600_1776 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2491 = torch.aten.reshape %2488, %2490 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1777 = torch.constant.int 1
    %2492 = torch.aten.add.Tensor %2491, %2436, %int1_1777 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1778 = torch.constant.float 9.9999997473787516E-6
    %int1600_1779 = torch.constant.int 1600
    %2493 = torch.prim.ListConstruct %int1600_1779 : (!torch.int) -> !torch.list<int>
    %result0_1780, %result1_1781, %result2_1782 = torch.aten.native_layer_norm %2492, %2493, %260, %261, %float9.999990e-06_1778 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2494 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1783 = torch.constant.int 7
    %int1600_1784 = torch.constant.int 1600
    %2495 = torch.prim.ListConstruct %int7_1783, %int1600_1784 : (!torch.int, !torch.int) -> !torch.list<int>
    %2496 = torch.aten.reshape %result0_1780, %2495 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1785 = torch.constant.int 0
    %int1_1786 = torch.constant.int 1
    %2497 = torch.aten.mm %2496, %262 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %2498 = torch.aten.add.Tensor %2497, %263, %int1_1786 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %2499 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1787 = torch.constant.int 1
    %int7_1788 = torch.constant.int 7
    %int6400_1789 = torch.constant.int 6400
    %2500 = torch.prim.ListConstruct %int1_1787, %int7_1788, %int6400_1789 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2501 = torch.aten.reshape %2498, %2500 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %2502 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2503 = torch.aten.mul.Tensor %2501, %2502 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2504 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %2505 = torch.aten.pow.Tensor_Tensor %2501, %2504 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2506 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2507 = torch.aten.mul.Tensor %2505, %2506 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_1790 = torch.constant.int 1
    %2508 = torch.aten.add.Tensor %2501, %2507, %int1_1790 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2509 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %2510 = torch.aten.mul.Tensor %2508, %2509 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2511 = torch.aten.tanh %2510 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2512 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_1791 = torch.constant.int 1
    %2513 = torch.aten.add.Tensor %2511, %2512, %int1_1791 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2514 = torch.aten.mul.Tensor %2503, %2513 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2515 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1792 = torch.constant.int 7
    %int6400_1793 = torch.constant.int 6400
    %2516 = torch.prim.ListConstruct %int7_1792, %int6400_1793 : (!torch.int, !torch.int) -> !torch.list<int>
    %2517 = torch.aten.reshape %2514, %2516 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_1794 = torch.constant.int 0
    %int1_1795 = torch.constant.int 1
    %2518 = torch.aten.mm %2517, %264 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2519 = torch.aten.add.Tensor %2518, %265, %int1_1795 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2520 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1796 = torch.constant.int 1
    %int7_1797 = torch.constant.int 7
    %int1600_1798 = torch.constant.int 1600
    %2521 = torch.prim.ListConstruct %int1_1796, %int7_1797, %int1600_1798 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2522 = torch.aten.reshape %2519, %2521 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1799 = torch.constant.int 1
    %2523 = torch.aten.add.Tensor %2492, %2522, %int1_1799 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1800 = torch.constant.float 9.9999997473787516E-6
    %int1600_1801 = torch.constant.int 1600
    %2524 = torch.prim.ListConstruct %int1600_1801 : (!torch.int) -> !torch.list<int>
    %result0_1802, %result1_1803, %result2_1804 = torch.aten.native_layer_norm %2523, %2524, %266, %267, %float9.999990e-06_1800 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2525 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1805 = torch.constant.int 7
    %int1600_1806 = torch.constant.int 1600
    %2526 = torch.prim.ListConstruct %int7_1805, %int1600_1806 : (!torch.int, !torch.int) -> !torch.list<int>
    %2527 = torch.aten.reshape %result0_1802, %2526 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1807 = torch.constant.int 0
    %int1_1808 = torch.constant.int 1
    %2528 = torch.aten.mm %2527, %268 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %2529 = torch.aten.add.Tensor %2528, %269, %int1_1808 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %2530 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1809 = torch.constant.int 1
    %int7_1810 = torch.constant.int 7
    %int4800_1811 = torch.constant.int 4800
    %2531 = torch.prim.ListConstruct %int1_1809, %int7_1810, %int4800_1811 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2532 = torch.aten.reshape %2529, %2531 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %2533 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %2534 = torch.prim.tolist(%2533) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_1812 = torch.constant.int 2
    %2535 = torch.aten.split_with_sizes %2532, %2534, %int2_1812 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %2536:3 = torch.prim.ListUnpack %2535 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %2537 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1813 = torch.constant.int 1
    %int7_1814 = torch.constant.int 7
    %int25_1815 = torch.constant.int 25
    %int64_1816 = torch.constant.int 64
    %2538 = torch.prim.ListConstruct %int1_1813, %int7_1814, %int25_1815, %int64_1816 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2539 = torch.aten.reshape %2536#0, %2538 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1817 = torch.constant.int 1
    %int2_1818 = torch.constant.int 2
    %2540 = torch.aten.transpose.int %2539, %int1_1817, %int2_1818 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2541 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1819 = torch.constant.int 1
    %int7_1820 = torch.constant.int 7
    %int25_1821 = torch.constant.int 25
    %int64_1822 = torch.constant.int 64
    %2542 = torch.prim.ListConstruct %int1_1819, %int7_1820, %int25_1821, %int64_1822 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2543 = torch.aten.reshape %2536#1, %2542 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1823 = torch.constant.int 1
    %int2_1824 = torch.constant.int 2
    %2544 = torch.aten.transpose.int %2543, %int1_1823, %int2_1824 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2545 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1825 = torch.constant.int 1
    %int7_1826 = torch.constant.int 7
    %int25_1827 = torch.constant.int 25
    %int64_1828 = torch.constant.int 64
    %2546 = torch.prim.ListConstruct %int1_1825, %int7_1826, %int25_1827, %int64_1828 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2547 = torch.aten.reshape %2536#2, %2546 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1829 = torch.constant.int 1
    %int2_1830 = torch.constant.int 2
    %2548 = torch.aten.transpose.int %2547, %int1_1829, %int2_1830 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1831 = torch.constant.int 1
    %int2_1832 = torch.constant.int 2
    %2549 = torch.aten.transpose.int %2543, %int1_1831, %int2_1832 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_1833 = torch.constant.int 2
    %int3_1834 = torch.constant.int 3
    %2550 = torch.aten.transpose.int %2549, %int2_1833, %int3_1834 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %2551 = torch.aten.matmul %2540, %2550 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2552 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2553 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2554 = torch.aten.pow.Tensor_Tensor %2552, %2553 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %2555 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_1835 = torch.constant.int 0
    %2556 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_1836 = torch.constant.none
    %float0.000000e00_1837 = torch.constant.float 0.000000e+00
    %2557 = torch.aten.full %2556, %float0.000000e00_1837, %none_1836, %none_1836, %none_1836, %none_1836 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_1838 = torch.constant.int 1
    %2558 = torch.aten.add.Tensor %2557, %2554, %int1_1838 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %2559 = torch.aten.div.Tensor %2551, %2558 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2560 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_1839 = torch.constant.int 6
    %none_1840 = torch.constant.none
    %false_1841 = torch.constant.bool false
    %2561 = torch.aten.to.dtype %2559, %int6_1839, %false_1841, %false_1841, %none_1840 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2562 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %2563 = torch.aten.where.self %2560, %2561, %2562 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_1842 = torch.constant.int 3
    %none_1843 = torch.constant.none
    %2564 = torch.aten.softmax.int %2563, %int3_1842, %none_1843 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_1844 = torch.constant.int 6
    %none_1845 = torch.constant.none
    %false_1846 = torch.constant.bool false
    %2565 = torch.aten.to.dtype %2564, %int6_1844, %false_1846, %false_1846, %none_1845 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2566 = torch.aten.matmul %2565, %2548 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1847 = torch.constant.int 1
    %int2_1848 = torch.constant.int 2
    %2567 = torch.aten.transpose.int %2566, %int1_1847, %int2_1848 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %2568 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1849 = torch.constant.int 1
    %int7_1850 = torch.constant.int 7
    %int1600_1851 = torch.constant.int 1600
    %2569 = torch.prim.ListConstruct %int1_1849, %int7_1850, %int1600_1851 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2570 = torch.aten.reshape %2567, %2569 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %2571 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1852 = torch.constant.int 7
    %int1600_1853 = torch.constant.int 1600
    %2572 = torch.prim.ListConstruct %int7_1852, %int1600_1853 : (!torch.int, !torch.int) -> !torch.list<int>
    %2573 = torch.aten.reshape %2570, %2572 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1854 = torch.constant.int 0
    %int1_1855 = torch.constant.int 1
    %2574 = torch.aten.mm %2573, %270 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2575 = torch.aten.add.Tensor %2574, %271, %int1_1855 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2576 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1856 = torch.constant.int 1
    %int7_1857 = torch.constant.int 7
    %int1600_1858 = torch.constant.int 1600
    %2577 = torch.prim.ListConstruct %int1_1856, %int7_1857, %int1600_1858 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2578 = torch.aten.reshape %2575, %2577 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1859 = torch.constant.int 1
    %2579 = torch.aten.add.Tensor %2578, %2523, %int1_1859 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1860 = torch.constant.float 9.9999997473787516E-6
    %int1600_1861 = torch.constant.int 1600
    %2580 = torch.prim.ListConstruct %int1600_1861 : (!torch.int) -> !torch.list<int>
    %result0_1862, %result1_1863, %result2_1864 = torch.aten.native_layer_norm %2579, %2580, %272, %273, %float9.999990e-06_1860 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2581 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1865 = torch.constant.int 7
    %int1600_1866 = torch.constant.int 1600
    %2582 = torch.prim.ListConstruct %int7_1865, %int1600_1866 : (!torch.int, !torch.int) -> !torch.list<int>
    %2583 = torch.aten.reshape %result0_1862, %2582 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1867 = torch.constant.int 0
    %int1_1868 = torch.constant.int 1
    %2584 = torch.aten.mm %2583, %274 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %2585 = torch.aten.add.Tensor %2584, %275, %int1_1868 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %2586 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1869 = torch.constant.int 1
    %int7_1870 = torch.constant.int 7
    %int6400_1871 = torch.constant.int 6400
    %2587 = torch.prim.ListConstruct %int1_1869, %int7_1870, %int6400_1871 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2588 = torch.aten.reshape %2585, %2587 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %2589 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2590 = torch.aten.mul.Tensor %2588, %2589 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2591 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %2592 = torch.aten.pow.Tensor_Tensor %2588, %2591 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2593 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2594 = torch.aten.mul.Tensor %2592, %2593 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_1872 = torch.constant.int 1
    %2595 = torch.aten.add.Tensor %2588, %2594, %int1_1872 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2596 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %2597 = torch.aten.mul.Tensor %2595, %2596 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2598 = torch.aten.tanh %2597 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2599 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_1873 = torch.constant.int 1
    %2600 = torch.aten.add.Tensor %2598, %2599, %int1_1873 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2601 = torch.aten.mul.Tensor %2590, %2600 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2602 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1874 = torch.constant.int 7
    %int6400_1875 = torch.constant.int 6400
    %2603 = torch.prim.ListConstruct %int7_1874, %int6400_1875 : (!torch.int, !torch.int) -> !torch.list<int>
    %2604 = torch.aten.reshape %2601, %2603 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_1876 = torch.constant.int 0
    %int1_1877 = torch.constant.int 1
    %2605 = torch.aten.mm %2604, %276 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2606 = torch.aten.add.Tensor %2605, %277, %int1_1877 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2607 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1878 = torch.constant.int 1
    %int7_1879 = torch.constant.int 7
    %int1600_1880 = torch.constant.int 1600
    %2608 = torch.prim.ListConstruct %int1_1878, %int7_1879, %int1600_1880 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2609 = torch.aten.reshape %2606, %2608 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1881 = torch.constant.int 1
    %2610 = torch.aten.add.Tensor %2579, %2609, %int1_1881 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1882 = torch.constant.float 9.9999997473787516E-6
    %int1600_1883 = torch.constant.int 1600
    %2611 = torch.prim.ListConstruct %int1600_1883 : (!torch.int) -> !torch.list<int>
    %result0_1884, %result1_1885, %result2_1886 = torch.aten.native_layer_norm %2610, %2611, %278, %279, %float9.999990e-06_1882 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2612 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1887 = torch.constant.int 7
    %int1600_1888 = torch.constant.int 1600
    %2613 = torch.prim.ListConstruct %int7_1887, %int1600_1888 : (!torch.int, !torch.int) -> !torch.list<int>
    %2614 = torch.aten.reshape %result0_1884, %2613 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1889 = torch.constant.int 0
    %int1_1890 = torch.constant.int 1
    %2615 = torch.aten.mm %2614, %280 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %2616 = torch.aten.add.Tensor %2615, %281, %int1_1890 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %2617 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1891 = torch.constant.int 1
    %int7_1892 = torch.constant.int 7
    %int4800_1893 = torch.constant.int 4800
    %2618 = torch.prim.ListConstruct %int1_1891, %int7_1892, %int4800_1893 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2619 = torch.aten.reshape %2616, %2618 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %2620 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %2621 = torch.prim.tolist(%2620) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_1894 = torch.constant.int 2
    %2622 = torch.aten.split_with_sizes %2619, %2621, %int2_1894 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %2623:3 = torch.prim.ListUnpack %2622 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %2624 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1895 = torch.constant.int 1
    %int7_1896 = torch.constant.int 7
    %int25_1897 = torch.constant.int 25
    %int64_1898 = torch.constant.int 64
    %2625 = torch.prim.ListConstruct %int1_1895, %int7_1896, %int25_1897, %int64_1898 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2626 = torch.aten.reshape %2623#0, %2625 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1899 = torch.constant.int 1
    %int2_1900 = torch.constant.int 2
    %2627 = torch.aten.transpose.int %2626, %int1_1899, %int2_1900 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2628 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1901 = torch.constant.int 1
    %int7_1902 = torch.constant.int 7
    %int25_1903 = torch.constant.int 25
    %int64_1904 = torch.constant.int 64
    %2629 = torch.prim.ListConstruct %int1_1901, %int7_1902, %int25_1903, %int64_1904 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2630 = torch.aten.reshape %2623#1, %2629 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1905 = torch.constant.int 1
    %int2_1906 = torch.constant.int 2
    %2631 = torch.aten.transpose.int %2630, %int1_1905, %int2_1906 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2632 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1907 = torch.constant.int 1
    %int7_1908 = torch.constant.int 7
    %int25_1909 = torch.constant.int 25
    %int64_1910 = torch.constant.int 64
    %2633 = torch.prim.ListConstruct %int1_1907, %int7_1908, %int25_1909, %int64_1910 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2634 = torch.aten.reshape %2623#2, %2633 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1911 = torch.constant.int 1
    %int2_1912 = torch.constant.int 2
    %2635 = torch.aten.transpose.int %2634, %int1_1911, %int2_1912 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1913 = torch.constant.int 1
    %int2_1914 = torch.constant.int 2
    %2636 = torch.aten.transpose.int %2630, %int1_1913, %int2_1914 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_1915 = torch.constant.int 2
    %int3_1916 = torch.constant.int 3
    %2637 = torch.aten.transpose.int %2636, %int2_1915, %int3_1916 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %2638 = torch.aten.matmul %2627, %2637 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2639 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2640 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2641 = torch.aten.pow.Tensor_Tensor %2639, %2640 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %2642 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_1917 = torch.constant.int 0
    %2643 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_1918 = torch.constant.none
    %float0.000000e00_1919 = torch.constant.float 0.000000e+00
    %2644 = torch.aten.full %2643, %float0.000000e00_1919, %none_1918, %none_1918, %none_1918, %none_1918 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_1920 = torch.constant.int 1
    %2645 = torch.aten.add.Tensor %2644, %2641, %int1_1920 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %2646 = torch.aten.div.Tensor %2638, %2645 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2647 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_1921 = torch.constant.int 6
    %none_1922 = torch.constant.none
    %false_1923 = torch.constant.bool false
    %2648 = torch.aten.to.dtype %2646, %int6_1921, %false_1923, %false_1923, %none_1922 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2649 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %2650 = torch.aten.where.self %2647, %2648, %2649 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_1924 = torch.constant.int 3
    %none_1925 = torch.constant.none
    %2651 = torch.aten.softmax.int %2650, %int3_1924, %none_1925 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_1926 = torch.constant.int 6
    %none_1927 = torch.constant.none
    %false_1928 = torch.constant.bool false
    %2652 = torch.aten.to.dtype %2651, %int6_1926, %false_1928, %false_1928, %none_1927 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2653 = torch.aten.matmul %2652, %2635 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1929 = torch.constant.int 1
    %int2_1930 = torch.constant.int 2
    %2654 = torch.aten.transpose.int %2653, %int1_1929, %int2_1930 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %2655 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1931 = torch.constant.int 1
    %int7_1932 = torch.constant.int 7
    %int1600_1933 = torch.constant.int 1600
    %2656 = torch.prim.ListConstruct %int1_1931, %int7_1932, %int1600_1933 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2657 = torch.aten.reshape %2654, %2656 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %2658 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1934 = torch.constant.int 7
    %int1600_1935 = torch.constant.int 1600
    %2659 = torch.prim.ListConstruct %int7_1934, %int1600_1935 : (!torch.int, !torch.int) -> !torch.list<int>
    %2660 = torch.aten.reshape %2657, %2659 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1936 = torch.constant.int 0
    %int1_1937 = torch.constant.int 1
    %2661 = torch.aten.mm %2660, %282 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2662 = torch.aten.add.Tensor %2661, %283, %int1_1937 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2663 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1938 = torch.constant.int 1
    %int7_1939 = torch.constant.int 7
    %int1600_1940 = torch.constant.int 1600
    %2664 = torch.prim.ListConstruct %int1_1938, %int7_1939, %int1600_1940 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2665 = torch.aten.reshape %2662, %2664 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1941 = torch.constant.int 1
    %2666 = torch.aten.add.Tensor %2665, %2610, %int1_1941 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1942 = torch.constant.float 9.9999997473787516E-6
    %int1600_1943 = torch.constant.int 1600
    %2667 = torch.prim.ListConstruct %int1600_1943 : (!torch.int) -> !torch.list<int>
    %result0_1944, %result1_1945, %result2_1946 = torch.aten.native_layer_norm %2666, %2667, %284, %285, %float9.999990e-06_1942 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2668 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1947 = torch.constant.int 7
    %int1600_1948 = torch.constant.int 1600
    %2669 = torch.prim.ListConstruct %int7_1947, %int1600_1948 : (!torch.int, !torch.int) -> !torch.list<int>
    %2670 = torch.aten.reshape %result0_1944, %2669 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1949 = torch.constant.int 0
    %int1_1950 = torch.constant.int 1
    %2671 = torch.aten.mm %2670, %286 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %2672 = torch.aten.add.Tensor %2671, %287, %int1_1950 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %2673 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1951 = torch.constant.int 1
    %int7_1952 = torch.constant.int 7
    %int6400_1953 = torch.constant.int 6400
    %2674 = torch.prim.ListConstruct %int1_1951, %int7_1952, %int6400_1953 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2675 = torch.aten.reshape %2672, %2674 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %2676 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2677 = torch.aten.mul.Tensor %2675, %2676 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2678 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %2679 = torch.aten.pow.Tensor_Tensor %2675, %2678 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2680 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2681 = torch.aten.mul.Tensor %2679, %2680 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_1954 = torch.constant.int 1
    %2682 = torch.aten.add.Tensor %2675, %2681, %int1_1954 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2683 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %2684 = torch.aten.mul.Tensor %2682, %2683 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2685 = torch.aten.tanh %2684 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2686 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_1955 = torch.constant.int 1
    %2687 = torch.aten.add.Tensor %2685, %2686, %int1_1955 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2688 = torch.aten.mul.Tensor %2677, %2687 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2689 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1956 = torch.constant.int 7
    %int6400_1957 = torch.constant.int 6400
    %2690 = torch.prim.ListConstruct %int7_1956, %int6400_1957 : (!torch.int, !torch.int) -> !torch.list<int>
    %2691 = torch.aten.reshape %2688, %2690 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_1958 = torch.constant.int 0
    %int1_1959 = torch.constant.int 1
    %2692 = torch.aten.mm %2691, %288 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2693 = torch.aten.add.Tensor %2692, %289, %int1_1959 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2694 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1960 = torch.constant.int 1
    %int7_1961 = torch.constant.int 7
    %int1600_1962 = torch.constant.int 1600
    %2695 = torch.prim.ListConstruct %int1_1960, %int7_1961, %int1600_1962 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2696 = torch.aten.reshape %2693, %2695 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_1963 = torch.constant.int 1
    %2697 = torch.aten.add.Tensor %2666, %2696, %int1_1963 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_1964 = torch.constant.float 9.9999997473787516E-6
    %int1600_1965 = torch.constant.int 1600
    %2698 = torch.prim.ListConstruct %int1600_1965 : (!torch.int) -> !torch.list<int>
    %result0_1966, %result1_1967, %result2_1968 = torch.aten.native_layer_norm %2697, %2698, %290, %291, %float9.999990e-06_1964 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2699 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_1969 = torch.constant.int 7
    %int1600_1970 = torch.constant.int 1600
    %2700 = torch.prim.ListConstruct %int7_1969, %int1600_1970 : (!torch.int, !torch.int) -> !torch.list<int>
    %2701 = torch.aten.reshape %result0_1966, %2700 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_1971 = torch.constant.int 0
    %int1_1972 = torch.constant.int 1
    %2702 = torch.aten.mm %2701, %292 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %2703 = torch.aten.add.Tensor %2702, %293, %int1_1972 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %2704 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_1973 = torch.constant.int 1
    %int7_1974 = torch.constant.int 7
    %int4800_1975 = torch.constant.int 4800
    %2705 = torch.prim.ListConstruct %int1_1973, %int7_1974, %int4800_1975 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2706 = torch.aten.reshape %2703, %2705 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %2707 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %2708 = torch.prim.tolist(%2707) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_1976 = torch.constant.int 2
    %2709 = torch.aten.split_with_sizes %2706, %2708, %int2_1976 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %2710:3 = torch.prim.ListUnpack %2709 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %2711 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1977 = torch.constant.int 1
    %int7_1978 = torch.constant.int 7
    %int25_1979 = torch.constant.int 25
    %int64_1980 = torch.constant.int 64
    %2712 = torch.prim.ListConstruct %int1_1977, %int7_1978, %int25_1979, %int64_1980 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2713 = torch.aten.reshape %2710#0, %2712 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1981 = torch.constant.int 1
    %int2_1982 = torch.constant.int 2
    %2714 = torch.aten.transpose.int %2713, %int1_1981, %int2_1982 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2715 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1983 = torch.constant.int 1
    %int7_1984 = torch.constant.int 7
    %int25_1985 = torch.constant.int 25
    %int64_1986 = torch.constant.int 64
    %2716 = torch.prim.ListConstruct %int1_1983, %int7_1984, %int25_1985, %int64_1986 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2717 = torch.aten.reshape %2710#1, %2716 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1987 = torch.constant.int 1
    %int2_1988 = torch.constant.int 2
    %2718 = torch.aten.transpose.int %2717, %int1_1987, %int2_1988 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2719 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_1989 = torch.constant.int 1
    %int7_1990 = torch.constant.int 7
    %int25_1991 = torch.constant.int 25
    %int64_1992 = torch.constant.int 64
    %2720 = torch.prim.ListConstruct %int1_1989, %int7_1990, %int25_1991, %int64_1992 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2721 = torch.aten.reshape %2710#2, %2720 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_1993 = torch.constant.int 1
    %int2_1994 = torch.constant.int 2
    %2722 = torch.aten.transpose.int %2721, %int1_1993, %int2_1994 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_1995 = torch.constant.int 1
    %int2_1996 = torch.constant.int 2
    %2723 = torch.aten.transpose.int %2717, %int1_1995, %int2_1996 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_1997 = torch.constant.int 2
    %int3_1998 = torch.constant.int 3
    %2724 = torch.aten.transpose.int %2723, %int2_1997, %int3_1998 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %2725 = torch.aten.matmul %2714, %2724 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2726 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2727 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2728 = torch.aten.pow.Tensor_Tensor %2726, %2727 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %2729 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_1999 = torch.constant.int 0
    %2730 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_2000 = torch.constant.none
    %float0.000000e00_2001 = torch.constant.float 0.000000e+00
    %2731 = torch.aten.full %2730, %float0.000000e00_2001, %none_2000, %none_2000, %none_2000, %none_2000 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_2002 = torch.constant.int 1
    %2732 = torch.aten.add.Tensor %2731, %2728, %int1_2002 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %2733 = torch.aten.div.Tensor %2725, %2732 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2734 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_2003 = torch.constant.int 6
    %none_2004 = torch.constant.none
    %false_2005 = torch.constant.bool false
    %2735 = torch.aten.to.dtype %2733, %int6_2003, %false_2005, %false_2005, %none_2004 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2736 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %2737 = torch.aten.where.self %2734, %2735, %2736 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_2006 = torch.constant.int 3
    %none_2007 = torch.constant.none
    %2738 = torch.aten.softmax.int %2737, %int3_2006, %none_2007 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_2008 = torch.constant.int 6
    %none_2009 = torch.constant.none
    %false_2010 = torch.constant.bool false
    %2739 = torch.aten.to.dtype %2738, %int6_2008, %false_2010, %false_2010, %none_2009 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2740 = torch.aten.matmul %2739, %2722 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2011 = torch.constant.int 1
    %int2_2012 = torch.constant.int 2
    %2741 = torch.aten.transpose.int %2740, %int1_2011, %int2_2012 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %2742 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2013 = torch.constant.int 1
    %int7_2014 = torch.constant.int 7
    %int1600_2015 = torch.constant.int 1600
    %2743 = torch.prim.ListConstruct %int1_2013, %int7_2014, %int1600_2015 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2744 = torch.aten.reshape %2741, %2743 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %2745 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2016 = torch.constant.int 7
    %int1600_2017 = torch.constant.int 1600
    %2746 = torch.prim.ListConstruct %int7_2016, %int1600_2017 : (!torch.int, !torch.int) -> !torch.list<int>
    %2747 = torch.aten.reshape %2744, %2746 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2018 = torch.constant.int 0
    %int1_2019 = torch.constant.int 1
    %2748 = torch.aten.mm %2747, %294 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2749 = torch.aten.add.Tensor %2748, %295, %int1_2019 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2750 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2020 = torch.constant.int 1
    %int7_2021 = torch.constant.int 7
    %int1600_2022 = torch.constant.int 1600
    %2751 = torch.prim.ListConstruct %int1_2020, %int7_2021, %int1600_2022 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2752 = torch.aten.reshape %2749, %2751 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2023 = torch.constant.int 1
    %2753 = torch.aten.add.Tensor %2752, %2697, %int1_2023 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2024 = torch.constant.float 9.9999997473787516E-6
    %int1600_2025 = torch.constant.int 1600
    %2754 = torch.prim.ListConstruct %int1600_2025 : (!torch.int) -> !torch.list<int>
    %result0_2026, %result1_2027, %result2_2028 = torch.aten.native_layer_norm %2753, %2754, %296, %297, %float9.999990e-06_2024 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2755 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2029 = torch.constant.int 7
    %int1600_2030 = torch.constant.int 1600
    %2756 = torch.prim.ListConstruct %int7_2029, %int1600_2030 : (!torch.int, !torch.int) -> !torch.list<int>
    %2757 = torch.aten.reshape %result0_2026, %2756 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2031 = torch.constant.int 0
    %int1_2032 = torch.constant.int 1
    %2758 = torch.aten.mm %2757, %298 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %2759 = torch.aten.add.Tensor %2758, %299, %int1_2032 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %2760 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2033 = torch.constant.int 1
    %int7_2034 = torch.constant.int 7
    %int6400_2035 = torch.constant.int 6400
    %2761 = torch.prim.ListConstruct %int1_2033, %int7_2034, %int6400_2035 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2762 = torch.aten.reshape %2759, %2761 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %2763 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2764 = torch.aten.mul.Tensor %2762, %2763 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2765 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %2766 = torch.aten.pow.Tensor_Tensor %2762, %2765 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2767 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2768 = torch.aten.mul.Tensor %2766, %2767 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_2036 = torch.constant.int 1
    %2769 = torch.aten.add.Tensor %2762, %2768, %int1_2036 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2770 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %2771 = torch.aten.mul.Tensor %2769, %2770 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2772 = torch.aten.tanh %2771 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2773 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_2037 = torch.constant.int 1
    %2774 = torch.aten.add.Tensor %2772, %2773, %int1_2037 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2775 = torch.aten.mul.Tensor %2764, %2774 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2776 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2038 = torch.constant.int 7
    %int6400_2039 = torch.constant.int 6400
    %2777 = torch.prim.ListConstruct %int7_2038, %int6400_2039 : (!torch.int, !torch.int) -> !torch.list<int>
    %2778 = torch.aten.reshape %2775, %2777 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_2040 = torch.constant.int 0
    %int1_2041 = torch.constant.int 1
    %2779 = torch.aten.mm %2778, %300 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2780 = torch.aten.add.Tensor %2779, %301, %int1_2041 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2781 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2042 = torch.constant.int 1
    %int7_2043 = torch.constant.int 7
    %int1600_2044 = torch.constant.int 1600
    %2782 = torch.prim.ListConstruct %int1_2042, %int7_2043, %int1600_2044 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2783 = torch.aten.reshape %2780, %2782 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2045 = torch.constant.int 1
    %2784 = torch.aten.add.Tensor %2753, %2783, %int1_2045 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2046 = torch.constant.float 9.9999997473787516E-6
    %int1600_2047 = torch.constant.int 1600
    %2785 = torch.prim.ListConstruct %int1600_2047 : (!torch.int) -> !torch.list<int>
    %result0_2048, %result1_2049, %result2_2050 = torch.aten.native_layer_norm %2784, %2785, %302, %303, %float9.999990e-06_2046 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2786 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2051 = torch.constant.int 7
    %int1600_2052 = torch.constant.int 1600
    %2787 = torch.prim.ListConstruct %int7_2051, %int1600_2052 : (!torch.int, !torch.int) -> !torch.list<int>
    %2788 = torch.aten.reshape %result0_2048, %2787 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2053 = torch.constant.int 0
    %int1_2054 = torch.constant.int 1
    %2789 = torch.aten.mm %2788, %304 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %2790 = torch.aten.add.Tensor %2789, %305, %int1_2054 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %2791 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2055 = torch.constant.int 1
    %int7_2056 = torch.constant.int 7
    %int4800_2057 = torch.constant.int 4800
    %2792 = torch.prim.ListConstruct %int1_2055, %int7_2056, %int4800_2057 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2793 = torch.aten.reshape %2790, %2792 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %2794 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %2795 = torch.prim.tolist(%2794) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_2058 = torch.constant.int 2
    %2796 = torch.aten.split_with_sizes %2793, %2795, %int2_2058 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %2797:3 = torch.prim.ListUnpack %2796 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %2798 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2059 = torch.constant.int 1
    %int7_2060 = torch.constant.int 7
    %int25_2061 = torch.constant.int 25
    %int64_2062 = torch.constant.int 64
    %2799 = torch.prim.ListConstruct %int1_2059, %int7_2060, %int25_2061, %int64_2062 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2800 = torch.aten.reshape %2797#0, %2799 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2063 = torch.constant.int 1
    %int2_2064 = torch.constant.int 2
    %2801 = torch.aten.transpose.int %2800, %int1_2063, %int2_2064 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2802 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2065 = torch.constant.int 1
    %int7_2066 = torch.constant.int 7
    %int25_2067 = torch.constant.int 25
    %int64_2068 = torch.constant.int 64
    %2803 = torch.prim.ListConstruct %int1_2065, %int7_2066, %int25_2067, %int64_2068 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2804 = torch.aten.reshape %2797#1, %2803 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2069 = torch.constant.int 1
    %int2_2070 = torch.constant.int 2
    %2805 = torch.aten.transpose.int %2804, %int1_2069, %int2_2070 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2806 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2071 = torch.constant.int 1
    %int7_2072 = torch.constant.int 7
    %int25_2073 = torch.constant.int 25
    %int64_2074 = torch.constant.int 64
    %2807 = torch.prim.ListConstruct %int1_2071, %int7_2072, %int25_2073, %int64_2074 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2808 = torch.aten.reshape %2797#2, %2807 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2075 = torch.constant.int 1
    %int2_2076 = torch.constant.int 2
    %2809 = torch.aten.transpose.int %2808, %int1_2075, %int2_2076 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2077 = torch.constant.int 1
    %int2_2078 = torch.constant.int 2
    %2810 = torch.aten.transpose.int %2804, %int1_2077, %int2_2078 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_2079 = torch.constant.int 2
    %int3_2080 = torch.constant.int 3
    %2811 = torch.aten.transpose.int %2810, %int2_2079, %int3_2080 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %2812 = torch.aten.matmul %2801, %2811 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2813 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2814 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2815 = torch.aten.pow.Tensor_Tensor %2813, %2814 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %2816 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_2081 = torch.constant.int 0
    %2817 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_2082 = torch.constant.none
    %float0.000000e00_2083 = torch.constant.float 0.000000e+00
    %2818 = torch.aten.full %2817, %float0.000000e00_2083, %none_2082, %none_2082, %none_2082, %none_2082 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_2084 = torch.constant.int 1
    %2819 = torch.aten.add.Tensor %2818, %2815, %int1_2084 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %2820 = torch.aten.div.Tensor %2812, %2819 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2821 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_2085 = torch.constant.int 6
    %none_2086 = torch.constant.none
    %false_2087 = torch.constant.bool false
    %2822 = torch.aten.to.dtype %2820, %int6_2085, %false_2087, %false_2087, %none_2086 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2823 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %2824 = torch.aten.where.self %2821, %2822, %2823 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_2088 = torch.constant.int 3
    %none_2089 = torch.constant.none
    %2825 = torch.aten.softmax.int %2824, %int3_2088, %none_2089 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_2090 = torch.constant.int 6
    %none_2091 = torch.constant.none
    %false_2092 = torch.constant.bool false
    %2826 = torch.aten.to.dtype %2825, %int6_2090, %false_2092, %false_2092, %none_2091 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2827 = torch.aten.matmul %2826, %2809 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2093 = torch.constant.int 1
    %int2_2094 = torch.constant.int 2
    %2828 = torch.aten.transpose.int %2827, %int1_2093, %int2_2094 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %2829 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2095 = torch.constant.int 1
    %int7_2096 = torch.constant.int 7
    %int1600_2097 = torch.constant.int 1600
    %2830 = torch.prim.ListConstruct %int1_2095, %int7_2096, %int1600_2097 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2831 = torch.aten.reshape %2828, %2830 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %2832 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2098 = torch.constant.int 7
    %int1600_2099 = torch.constant.int 1600
    %2833 = torch.prim.ListConstruct %int7_2098, %int1600_2099 : (!torch.int, !torch.int) -> !torch.list<int>
    %2834 = torch.aten.reshape %2831, %2833 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2100 = torch.constant.int 0
    %int1_2101 = torch.constant.int 1
    %2835 = torch.aten.mm %2834, %306 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2836 = torch.aten.add.Tensor %2835, %307, %int1_2101 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2837 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2102 = torch.constant.int 1
    %int7_2103 = torch.constant.int 7
    %int1600_2104 = torch.constant.int 1600
    %2838 = torch.prim.ListConstruct %int1_2102, %int7_2103, %int1600_2104 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2839 = torch.aten.reshape %2836, %2838 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2105 = torch.constant.int 1
    %2840 = torch.aten.add.Tensor %2839, %2784, %int1_2105 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2106 = torch.constant.float 9.9999997473787516E-6
    %int1600_2107 = torch.constant.int 1600
    %2841 = torch.prim.ListConstruct %int1600_2107 : (!torch.int) -> !torch.list<int>
    %result0_2108, %result1_2109, %result2_2110 = torch.aten.native_layer_norm %2840, %2841, %308, %309, %float9.999990e-06_2106 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2842 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2111 = torch.constant.int 7
    %int1600_2112 = torch.constant.int 1600
    %2843 = torch.prim.ListConstruct %int7_2111, %int1600_2112 : (!torch.int, !torch.int) -> !torch.list<int>
    %2844 = torch.aten.reshape %result0_2108, %2843 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2113 = torch.constant.int 0
    %int1_2114 = torch.constant.int 1
    %2845 = torch.aten.mm %2844, %310 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %2846 = torch.aten.add.Tensor %2845, %311, %int1_2114 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %2847 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2115 = torch.constant.int 1
    %int7_2116 = torch.constant.int 7
    %int6400_2117 = torch.constant.int 6400
    %2848 = torch.prim.ListConstruct %int1_2115, %int7_2116, %int6400_2117 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2849 = torch.aten.reshape %2846, %2848 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %2850 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2851 = torch.aten.mul.Tensor %2849, %2850 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2852 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %2853 = torch.aten.pow.Tensor_Tensor %2849, %2852 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2854 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2855 = torch.aten.mul.Tensor %2853, %2854 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_2118 = torch.constant.int 1
    %2856 = torch.aten.add.Tensor %2849, %2855, %int1_2118 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2857 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %2858 = torch.aten.mul.Tensor %2856, %2857 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2859 = torch.aten.tanh %2858 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2860 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_2119 = torch.constant.int 1
    %2861 = torch.aten.add.Tensor %2859, %2860, %int1_2119 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2862 = torch.aten.mul.Tensor %2851, %2861 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2863 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2120 = torch.constant.int 7
    %int6400_2121 = torch.constant.int 6400
    %2864 = torch.prim.ListConstruct %int7_2120, %int6400_2121 : (!torch.int, !torch.int) -> !torch.list<int>
    %2865 = torch.aten.reshape %2862, %2864 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_2122 = torch.constant.int 0
    %int1_2123 = torch.constant.int 1
    %2866 = torch.aten.mm %2865, %312 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2867 = torch.aten.add.Tensor %2866, %313, %int1_2123 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2868 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2124 = torch.constant.int 1
    %int7_2125 = torch.constant.int 7
    %int1600_2126 = torch.constant.int 1600
    %2869 = torch.prim.ListConstruct %int1_2124, %int7_2125, %int1600_2126 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2870 = torch.aten.reshape %2867, %2869 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2127 = torch.constant.int 1
    %2871 = torch.aten.add.Tensor %2840, %2870, %int1_2127 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2128 = torch.constant.float 9.9999997473787516E-6
    %int1600_2129 = torch.constant.int 1600
    %2872 = torch.prim.ListConstruct %int1600_2129 : (!torch.int) -> !torch.list<int>
    %result0_2130, %result1_2131, %result2_2132 = torch.aten.native_layer_norm %2871, %2872, %314, %315, %float9.999990e-06_2128 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2873 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2133 = torch.constant.int 7
    %int1600_2134 = torch.constant.int 1600
    %2874 = torch.prim.ListConstruct %int7_2133, %int1600_2134 : (!torch.int, !torch.int) -> !torch.list<int>
    %2875 = torch.aten.reshape %result0_2130, %2874 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2135 = torch.constant.int 0
    %int1_2136 = torch.constant.int 1
    %2876 = torch.aten.mm %2875, %316 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %2877 = torch.aten.add.Tensor %2876, %317, %int1_2136 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %2878 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2137 = torch.constant.int 1
    %int7_2138 = torch.constant.int 7
    %int4800_2139 = torch.constant.int 4800
    %2879 = torch.prim.ListConstruct %int1_2137, %int7_2138, %int4800_2139 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2880 = torch.aten.reshape %2877, %2879 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %2881 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %2882 = torch.prim.tolist(%2881) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_2140 = torch.constant.int 2
    %2883 = torch.aten.split_with_sizes %2880, %2882, %int2_2140 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %2884:3 = torch.prim.ListUnpack %2883 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %2885 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2141 = torch.constant.int 1
    %int7_2142 = torch.constant.int 7
    %int25_2143 = torch.constant.int 25
    %int64_2144 = torch.constant.int 64
    %2886 = torch.prim.ListConstruct %int1_2141, %int7_2142, %int25_2143, %int64_2144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2887 = torch.aten.reshape %2884#0, %2886 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2145 = torch.constant.int 1
    %int2_2146 = torch.constant.int 2
    %2888 = torch.aten.transpose.int %2887, %int1_2145, %int2_2146 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2889 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2147 = torch.constant.int 1
    %int7_2148 = torch.constant.int 7
    %int25_2149 = torch.constant.int 25
    %int64_2150 = torch.constant.int 64
    %2890 = torch.prim.ListConstruct %int1_2147, %int7_2148, %int25_2149, %int64_2150 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2891 = torch.aten.reshape %2884#1, %2890 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2151 = torch.constant.int 1
    %int2_2152 = torch.constant.int 2
    %2892 = torch.aten.transpose.int %2891, %int1_2151, %int2_2152 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2893 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2153 = torch.constant.int 1
    %int7_2154 = torch.constant.int 7
    %int25_2155 = torch.constant.int 25
    %int64_2156 = torch.constant.int 64
    %2894 = torch.prim.ListConstruct %int1_2153, %int7_2154, %int25_2155, %int64_2156 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2895 = torch.aten.reshape %2884#2, %2894 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2157 = torch.constant.int 1
    %int2_2158 = torch.constant.int 2
    %2896 = torch.aten.transpose.int %2895, %int1_2157, %int2_2158 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2159 = torch.constant.int 1
    %int2_2160 = torch.constant.int 2
    %2897 = torch.aten.transpose.int %2891, %int1_2159, %int2_2160 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_2161 = torch.constant.int 2
    %int3_2162 = torch.constant.int 3
    %2898 = torch.aten.transpose.int %2897, %int2_2161, %int3_2162 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %2899 = torch.aten.matmul %2888, %2898 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2900 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2901 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2902 = torch.aten.pow.Tensor_Tensor %2900, %2901 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %2903 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_2163 = torch.constant.int 0
    %2904 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_2164 = torch.constant.none
    %float0.000000e00_2165 = torch.constant.float 0.000000e+00
    %2905 = torch.aten.full %2904, %float0.000000e00_2165, %none_2164, %none_2164, %none_2164, %none_2164 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_2166 = torch.constant.int 1
    %2906 = torch.aten.add.Tensor %2905, %2902, %int1_2166 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %2907 = torch.aten.div.Tensor %2899, %2906 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2908 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_2167 = torch.constant.int 6
    %none_2168 = torch.constant.none
    %false_2169 = torch.constant.bool false
    %2909 = torch.aten.to.dtype %2907, %int6_2167, %false_2169, %false_2169, %none_2168 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2910 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %2911 = torch.aten.where.self %2908, %2909, %2910 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_2170 = torch.constant.int 3
    %none_2171 = torch.constant.none
    %2912 = torch.aten.softmax.int %2911, %int3_2170, %none_2171 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_2172 = torch.constant.int 6
    %none_2173 = torch.constant.none
    %false_2174 = torch.constant.bool false
    %2913 = torch.aten.to.dtype %2912, %int6_2172, %false_2174, %false_2174, %none_2173 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2914 = torch.aten.matmul %2913, %2896 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2175 = torch.constant.int 1
    %int2_2176 = torch.constant.int 2
    %2915 = torch.aten.transpose.int %2914, %int1_2175, %int2_2176 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %2916 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2177 = torch.constant.int 1
    %int7_2178 = torch.constant.int 7
    %int1600_2179 = torch.constant.int 1600
    %2917 = torch.prim.ListConstruct %int1_2177, %int7_2178, %int1600_2179 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2918 = torch.aten.reshape %2915, %2917 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %2919 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2180 = torch.constant.int 7
    %int1600_2181 = torch.constant.int 1600
    %2920 = torch.prim.ListConstruct %int7_2180, %int1600_2181 : (!torch.int, !torch.int) -> !torch.list<int>
    %2921 = torch.aten.reshape %2918, %2920 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2182 = torch.constant.int 0
    %int1_2183 = torch.constant.int 1
    %2922 = torch.aten.mm %2921, %318 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2923 = torch.aten.add.Tensor %2922, %319, %int1_2183 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2924 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2184 = torch.constant.int 1
    %int7_2185 = torch.constant.int 7
    %int1600_2186 = torch.constant.int 1600
    %2925 = torch.prim.ListConstruct %int1_2184, %int7_2185, %int1600_2186 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2926 = torch.aten.reshape %2923, %2925 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2187 = torch.constant.int 1
    %2927 = torch.aten.add.Tensor %2926, %2871, %int1_2187 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2188 = torch.constant.float 9.9999997473787516E-6
    %int1600_2189 = torch.constant.int 1600
    %2928 = torch.prim.ListConstruct %int1600_2189 : (!torch.int) -> !torch.list<int>
    %result0_2190, %result1_2191, %result2_2192 = torch.aten.native_layer_norm %2927, %2928, %320, %321, %float9.999990e-06_2188 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2929 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2193 = torch.constant.int 7
    %int1600_2194 = torch.constant.int 1600
    %2930 = torch.prim.ListConstruct %int7_2193, %int1600_2194 : (!torch.int, !torch.int) -> !torch.list<int>
    %2931 = torch.aten.reshape %result0_2190, %2930 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2195 = torch.constant.int 0
    %int1_2196 = torch.constant.int 1
    %2932 = torch.aten.mm %2931, %322 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %2933 = torch.aten.add.Tensor %2932, %323, %int1_2196 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %2934 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2197 = torch.constant.int 1
    %int7_2198 = torch.constant.int 7
    %int6400_2199 = torch.constant.int 6400
    %2935 = torch.prim.ListConstruct %int1_2197, %int7_2198, %int6400_2199 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2936 = torch.aten.reshape %2933, %2935 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %2937 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2938 = torch.aten.mul.Tensor %2936, %2937 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2939 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %2940 = torch.aten.pow.Tensor_Tensor %2936, %2939 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2941 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %2942 = torch.aten.mul.Tensor %2940, %2941 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_2200 = torch.constant.int 1
    %2943 = torch.aten.add.Tensor %2936, %2942, %int1_2200 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2944 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %2945 = torch.aten.mul.Tensor %2943, %2944 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2946 = torch.aten.tanh %2945 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2947 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_2201 = torch.constant.int 1
    %2948 = torch.aten.add.Tensor %2946, %2947, %int1_2201 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %2949 = torch.aten.mul.Tensor %2938, %2948 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %2950 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2202 = torch.constant.int 7
    %int6400_2203 = torch.constant.int 6400
    %2951 = torch.prim.ListConstruct %int7_2202, %int6400_2203 : (!torch.int, !torch.int) -> !torch.list<int>
    %2952 = torch.aten.reshape %2949, %2951 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_2204 = torch.constant.int 0
    %int1_2205 = torch.constant.int 1
    %2953 = torch.aten.mm %2952, %324 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %2954 = torch.aten.add.Tensor %2953, %325, %int1_2205 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %2955 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2206 = torch.constant.int 1
    %int7_2207 = torch.constant.int 7
    %int1600_2208 = torch.constant.int 1600
    %2956 = torch.prim.ListConstruct %int1_2206, %int7_2207, %int1600_2208 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2957 = torch.aten.reshape %2954, %2956 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2209 = torch.constant.int 1
    %2958 = torch.aten.add.Tensor %2927, %2957, %int1_2209 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2210 = torch.constant.float 9.9999997473787516E-6
    %int1600_2211 = torch.constant.int 1600
    %2959 = torch.prim.ListConstruct %int1600_2211 : (!torch.int) -> !torch.list<int>
    %result0_2212, %result1_2213, %result2_2214 = torch.aten.native_layer_norm %2958, %2959, %326, %327, %float9.999990e-06_2210 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %2960 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2215 = torch.constant.int 7
    %int1600_2216 = torch.constant.int 1600
    %2961 = torch.prim.ListConstruct %int7_2215, %int1600_2216 : (!torch.int, !torch.int) -> !torch.list<int>
    %2962 = torch.aten.reshape %result0_2212, %2961 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2217 = torch.constant.int 0
    %int1_2218 = torch.constant.int 1
    %2963 = torch.aten.mm %2962, %328 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %2964 = torch.aten.add.Tensor %2963, %329, %int1_2218 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %2965 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2219 = torch.constant.int 1
    %int7_2220 = torch.constant.int 7
    %int4800_2221 = torch.constant.int 4800
    %2966 = torch.prim.ListConstruct %int1_2219, %int7_2220, %int4800_2221 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2967 = torch.aten.reshape %2964, %2966 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %2968 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %2969 = torch.prim.tolist(%2968) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_2222 = torch.constant.int 2
    %2970 = torch.aten.split_with_sizes %2967, %2969, %int2_2222 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %2971:3 = torch.prim.ListUnpack %2970 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %2972 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2223 = torch.constant.int 1
    %int7_2224 = torch.constant.int 7
    %int25_2225 = torch.constant.int 25
    %int64_2226 = torch.constant.int 64
    %2973 = torch.prim.ListConstruct %int1_2223, %int7_2224, %int25_2225, %int64_2226 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2974 = torch.aten.reshape %2971#0, %2973 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2227 = torch.constant.int 1
    %int2_2228 = torch.constant.int 2
    %2975 = torch.aten.transpose.int %2974, %int1_2227, %int2_2228 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2976 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2229 = torch.constant.int 1
    %int7_2230 = torch.constant.int 7
    %int25_2231 = torch.constant.int 25
    %int64_2232 = torch.constant.int 64
    %2977 = torch.prim.ListConstruct %int1_2229, %int7_2230, %int25_2231, %int64_2232 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2978 = torch.aten.reshape %2971#1, %2977 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2233 = torch.constant.int 1
    %int2_2234 = torch.constant.int 2
    %2979 = torch.aten.transpose.int %2978, %int1_2233, %int2_2234 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %2980 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2235 = torch.constant.int 1
    %int7_2236 = torch.constant.int 7
    %int25_2237 = torch.constant.int 25
    %int64_2238 = torch.constant.int 64
    %2981 = torch.prim.ListConstruct %int1_2235, %int7_2236, %int25_2237, %int64_2238 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2982 = torch.aten.reshape %2971#2, %2981 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2239 = torch.constant.int 1
    %int2_2240 = torch.constant.int 2
    %2983 = torch.aten.transpose.int %2982, %int1_2239, %int2_2240 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2241 = torch.constant.int 1
    %int2_2242 = torch.constant.int 2
    %2984 = torch.aten.transpose.int %2978, %int1_2241, %int2_2242 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_2243 = torch.constant.int 2
    %int3_2244 = torch.constant.int 3
    %2985 = torch.aten.transpose.int %2984, %int2_2243, %int3_2244 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %2986 = torch.aten.matmul %2975, %2985 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2987 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2988 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %2989 = torch.aten.pow.Tensor_Tensor %2987, %2988 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %2990 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_2245 = torch.constant.int 0
    %2991 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_2246 = torch.constant.none
    %float0.000000e00_2247 = torch.constant.float 0.000000e+00
    %2992 = torch.aten.full %2991, %float0.000000e00_2247, %none_2246, %none_2246, %none_2246, %none_2246 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_2248 = torch.constant.int 1
    %2993 = torch.aten.add.Tensor %2992, %2989, %int1_2248 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %2994 = torch.aten.div.Tensor %2986, %2993 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %2995 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_2249 = torch.constant.int 6
    %none_2250 = torch.constant.none
    %false_2251 = torch.constant.bool false
    %2996 = torch.aten.to.dtype %2994, %int6_2249, %false_2251, %false_2251, %none_2250 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %2997 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %2998 = torch.aten.where.self %2995, %2996, %2997 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_2252 = torch.constant.int 3
    %none_2253 = torch.constant.none
    %2999 = torch.aten.softmax.int %2998, %int3_2252, %none_2253 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_2254 = torch.constant.int 6
    %none_2255 = torch.constant.none
    %false_2256 = torch.constant.bool false
    %3000 = torch.aten.to.dtype %2999, %int6_2254, %false_2256, %false_2256, %none_2255 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3001 = torch.aten.matmul %3000, %2983 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2257 = torch.constant.int 1
    %int2_2258 = torch.constant.int 2
    %3002 = torch.aten.transpose.int %3001, %int1_2257, %int2_2258 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %3003 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2259 = torch.constant.int 1
    %int7_2260 = torch.constant.int 7
    %int1600_2261 = torch.constant.int 1600
    %3004 = torch.prim.ListConstruct %int1_2259, %int7_2260, %int1600_2261 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3005 = torch.aten.reshape %3002, %3004 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %3006 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2262 = torch.constant.int 7
    %int1600_2263 = torch.constant.int 1600
    %3007 = torch.prim.ListConstruct %int7_2262, %int1600_2263 : (!torch.int, !torch.int) -> !torch.list<int>
    %3008 = torch.aten.reshape %3005, %3007 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2264 = torch.constant.int 0
    %int1_2265 = torch.constant.int 1
    %3009 = torch.aten.mm %3008, %330 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3010 = torch.aten.add.Tensor %3009, %331, %int1_2265 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3011 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2266 = torch.constant.int 1
    %int7_2267 = torch.constant.int 7
    %int1600_2268 = torch.constant.int 1600
    %3012 = torch.prim.ListConstruct %int1_2266, %int7_2267, %int1600_2268 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3013 = torch.aten.reshape %3010, %3012 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2269 = torch.constant.int 1
    %3014 = torch.aten.add.Tensor %3013, %2958, %int1_2269 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2270 = torch.constant.float 9.9999997473787516E-6
    %int1600_2271 = torch.constant.int 1600
    %3015 = torch.prim.ListConstruct %int1600_2271 : (!torch.int) -> !torch.list<int>
    %result0_2272, %result1_2273, %result2_2274 = torch.aten.native_layer_norm %3014, %3015, %332, %333, %float9.999990e-06_2270 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3016 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2275 = torch.constant.int 7
    %int1600_2276 = torch.constant.int 1600
    %3017 = torch.prim.ListConstruct %int7_2275, %int1600_2276 : (!torch.int, !torch.int) -> !torch.list<int>
    %3018 = torch.aten.reshape %result0_2272, %3017 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2277 = torch.constant.int 0
    %int1_2278 = torch.constant.int 1
    %3019 = torch.aten.mm %3018, %334 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %3020 = torch.aten.add.Tensor %3019, %335, %int1_2278 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %3021 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2279 = torch.constant.int 1
    %int7_2280 = torch.constant.int 7
    %int6400_2281 = torch.constant.int 6400
    %3022 = torch.prim.ListConstruct %int1_2279, %int7_2280, %int6400_2281 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3023 = torch.aten.reshape %3020, %3022 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %3024 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3025 = torch.aten.mul.Tensor %3023, %3024 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3026 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %3027 = torch.aten.pow.Tensor_Tensor %3023, %3026 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3028 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3029 = torch.aten.mul.Tensor %3027, %3028 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_2282 = torch.constant.int 1
    %3030 = torch.aten.add.Tensor %3023, %3029, %int1_2282 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3031 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %3032 = torch.aten.mul.Tensor %3030, %3031 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3033 = torch.aten.tanh %3032 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3034 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_2283 = torch.constant.int 1
    %3035 = torch.aten.add.Tensor %3033, %3034, %int1_2283 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3036 = torch.aten.mul.Tensor %3025, %3035 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3037 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2284 = torch.constant.int 7
    %int6400_2285 = torch.constant.int 6400
    %3038 = torch.prim.ListConstruct %int7_2284, %int6400_2285 : (!torch.int, !torch.int) -> !torch.list<int>
    %3039 = torch.aten.reshape %3036, %3038 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_2286 = torch.constant.int 0
    %int1_2287 = torch.constant.int 1
    %3040 = torch.aten.mm %3039, %336 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3041 = torch.aten.add.Tensor %3040, %337, %int1_2287 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3042 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2288 = torch.constant.int 1
    %int7_2289 = torch.constant.int 7
    %int1600_2290 = torch.constant.int 1600
    %3043 = torch.prim.ListConstruct %int1_2288, %int7_2289, %int1600_2290 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3044 = torch.aten.reshape %3041, %3043 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2291 = torch.constant.int 1
    %3045 = torch.aten.add.Tensor %3014, %3044, %int1_2291 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2292 = torch.constant.float 9.9999997473787516E-6
    %int1600_2293 = torch.constant.int 1600
    %3046 = torch.prim.ListConstruct %int1600_2293 : (!torch.int) -> !torch.list<int>
    %result0_2294, %result1_2295, %result2_2296 = torch.aten.native_layer_norm %3045, %3046, %338, %339, %float9.999990e-06_2292 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3047 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2297 = torch.constant.int 7
    %int1600_2298 = torch.constant.int 1600
    %3048 = torch.prim.ListConstruct %int7_2297, %int1600_2298 : (!torch.int, !torch.int) -> !torch.list<int>
    %3049 = torch.aten.reshape %result0_2294, %3048 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2299 = torch.constant.int 0
    %int1_2300 = torch.constant.int 1
    %3050 = torch.aten.mm %3049, %340 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %3051 = torch.aten.add.Tensor %3050, %341, %int1_2300 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %3052 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2301 = torch.constant.int 1
    %int7_2302 = torch.constant.int 7
    %int4800_2303 = torch.constant.int 4800
    %3053 = torch.prim.ListConstruct %int1_2301, %int7_2302, %int4800_2303 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3054 = torch.aten.reshape %3051, %3053 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %3055 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %3056 = torch.prim.tolist(%3055) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_2304 = torch.constant.int 2
    %3057 = torch.aten.split_with_sizes %3054, %3056, %int2_2304 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %3058:3 = torch.prim.ListUnpack %3057 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %3059 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2305 = torch.constant.int 1
    %int7_2306 = torch.constant.int 7
    %int25_2307 = torch.constant.int 25
    %int64_2308 = torch.constant.int 64
    %3060 = torch.prim.ListConstruct %int1_2305, %int7_2306, %int25_2307, %int64_2308 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3061 = torch.aten.reshape %3058#0, %3060 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2309 = torch.constant.int 1
    %int2_2310 = torch.constant.int 2
    %3062 = torch.aten.transpose.int %3061, %int1_2309, %int2_2310 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3063 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2311 = torch.constant.int 1
    %int7_2312 = torch.constant.int 7
    %int25_2313 = torch.constant.int 25
    %int64_2314 = torch.constant.int 64
    %3064 = torch.prim.ListConstruct %int1_2311, %int7_2312, %int25_2313, %int64_2314 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3065 = torch.aten.reshape %3058#1, %3064 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2315 = torch.constant.int 1
    %int2_2316 = torch.constant.int 2
    %3066 = torch.aten.transpose.int %3065, %int1_2315, %int2_2316 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3067 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2317 = torch.constant.int 1
    %int7_2318 = torch.constant.int 7
    %int25_2319 = torch.constant.int 25
    %int64_2320 = torch.constant.int 64
    %3068 = torch.prim.ListConstruct %int1_2317, %int7_2318, %int25_2319, %int64_2320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3069 = torch.aten.reshape %3058#2, %3068 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2321 = torch.constant.int 1
    %int2_2322 = torch.constant.int 2
    %3070 = torch.aten.transpose.int %3069, %int1_2321, %int2_2322 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2323 = torch.constant.int 1
    %int2_2324 = torch.constant.int 2
    %3071 = torch.aten.transpose.int %3065, %int1_2323, %int2_2324 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_2325 = torch.constant.int 2
    %int3_2326 = torch.constant.int 3
    %3072 = torch.aten.transpose.int %3071, %int2_2325, %int3_2326 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %3073 = torch.aten.matmul %3062, %3072 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3074 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3075 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3076 = torch.aten.pow.Tensor_Tensor %3074, %3075 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %3077 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_2327 = torch.constant.int 0
    %3078 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_2328 = torch.constant.none
    %float0.000000e00_2329 = torch.constant.float 0.000000e+00
    %3079 = torch.aten.full %3078, %float0.000000e00_2329, %none_2328, %none_2328, %none_2328, %none_2328 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_2330 = torch.constant.int 1
    %3080 = torch.aten.add.Tensor %3079, %3076, %int1_2330 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %3081 = torch.aten.div.Tensor %3073, %3080 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3082 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_2331 = torch.constant.int 6
    %none_2332 = torch.constant.none
    %false_2333 = torch.constant.bool false
    %3083 = torch.aten.to.dtype %3081, %int6_2331, %false_2333, %false_2333, %none_2332 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3084 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %3085 = torch.aten.where.self %3082, %3083, %3084 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_2334 = torch.constant.int 3
    %none_2335 = torch.constant.none
    %3086 = torch.aten.softmax.int %3085, %int3_2334, %none_2335 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_2336 = torch.constant.int 6
    %none_2337 = torch.constant.none
    %false_2338 = torch.constant.bool false
    %3087 = torch.aten.to.dtype %3086, %int6_2336, %false_2338, %false_2338, %none_2337 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3088 = torch.aten.matmul %3087, %3070 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2339 = torch.constant.int 1
    %int2_2340 = torch.constant.int 2
    %3089 = torch.aten.transpose.int %3088, %int1_2339, %int2_2340 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %3090 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2341 = torch.constant.int 1
    %int7_2342 = torch.constant.int 7
    %int1600_2343 = torch.constant.int 1600
    %3091 = torch.prim.ListConstruct %int1_2341, %int7_2342, %int1600_2343 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3092 = torch.aten.reshape %3089, %3091 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %3093 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2344 = torch.constant.int 7
    %int1600_2345 = torch.constant.int 1600
    %3094 = torch.prim.ListConstruct %int7_2344, %int1600_2345 : (!torch.int, !torch.int) -> !torch.list<int>
    %3095 = torch.aten.reshape %3092, %3094 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2346 = torch.constant.int 0
    %int1_2347 = torch.constant.int 1
    %3096 = torch.aten.mm %3095, %342 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3097 = torch.aten.add.Tensor %3096, %343, %int1_2347 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3098 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2348 = torch.constant.int 1
    %int7_2349 = torch.constant.int 7
    %int1600_2350 = torch.constant.int 1600
    %3099 = torch.prim.ListConstruct %int1_2348, %int7_2349, %int1600_2350 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3100 = torch.aten.reshape %3097, %3099 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2351 = torch.constant.int 1
    %3101 = torch.aten.add.Tensor %3100, %3045, %int1_2351 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2352 = torch.constant.float 9.9999997473787516E-6
    %int1600_2353 = torch.constant.int 1600
    %3102 = torch.prim.ListConstruct %int1600_2353 : (!torch.int) -> !torch.list<int>
    %result0_2354, %result1_2355, %result2_2356 = torch.aten.native_layer_norm %3101, %3102, %344, %345, %float9.999990e-06_2352 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3103 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2357 = torch.constant.int 7
    %int1600_2358 = torch.constant.int 1600
    %3104 = torch.prim.ListConstruct %int7_2357, %int1600_2358 : (!torch.int, !torch.int) -> !torch.list<int>
    %3105 = torch.aten.reshape %result0_2354, %3104 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2359 = torch.constant.int 0
    %int1_2360 = torch.constant.int 1
    %3106 = torch.aten.mm %3105, %346 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %3107 = torch.aten.add.Tensor %3106, %347, %int1_2360 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %3108 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2361 = torch.constant.int 1
    %int7_2362 = torch.constant.int 7
    %int6400_2363 = torch.constant.int 6400
    %3109 = torch.prim.ListConstruct %int1_2361, %int7_2362, %int6400_2363 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3110 = torch.aten.reshape %3107, %3109 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %3111 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3112 = torch.aten.mul.Tensor %3110, %3111 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3113 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %3114 = torch.aten.pow.Tensor_Tensor %3110, %3113 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3115 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3116 = torch.aten.mul.Tensor %3114, %3115 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_2364 = torch.constant.int 1
    %3117 = torch.aten.add.Tensor %3110, %3116, %int1_2364 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3118 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %3119 = torch.aten.mul.Tensor %3117, %3118 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3120 = torch.aten.tanh %3119 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3121 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_2365 = torch.constant.int 1
    %3122 = torch.aten.add.Tensor %3120, %3121, %int1_2365 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3123 = torch.aten.mul.Tensor %3112, %3122 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3124 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2366 = torch.constant.int 7
    %int6400_2367 = torch.constant.int 6400
    %3125 = torch.prim.ListConstruct %int7_2366, %int6400_2367 : (!torch.int, !torch.int) -> !torch.list<int>
    %3126 = torch.aten.reshape %3123, %3125 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_2368 = torch.constant.int 0
    %int1_2369 = torch.constant.int 1
    %3127 = torch.aten.mm %3126, %348 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3128 = torch.aten.add.Tensor %3127, %349, %int1_2369 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3129 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2370 = torch.constant.int 1
    %int7_2371 = torch.constant.int 7
    %int1600_2372 = torch.constant.int 1600
    %3130 = torch.prim.ListConstruct %int1_2370, %int7_2371, %int1600_2372 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3131 = torch.aten.reshape %3128, %3130 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2373 = torch.constant.int 1
    %3132 = torch.aten.add.Tensor %3101, %3131, %int1_2373 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2374 = torch.constant.float 9.9999997473787516E-6
    %int1600_2375 = torch.constant.int 1600
    %3133 = torch.prim.ListConstruct %int1600_2375 : (!torch.int) -> !torch.list<int>
    %result0_2376, %result1_2377, %result2_2378 = torch.aten.native_layer_norm %3132, %3133, %350, %351, %float9.999990e-06_2374 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3134 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2379 = torch.constant.int 7
    %int1600_2380 = torch.constant.int 1600
    %3135 = torch.prim.ListConstruct %int7_2379, %int1600_2380 : (!torch.int, !torch.int) -> !torch.list<int>
    %3136 = torch.aten.reshape %result0_2376, %3135 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2381 = torch.constant.int 0
    %int1_2382 = torch.constant.int 1
    %3137 = torch.aten.mm %3136, %352 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %3138 = torch.aten.add.Tensor %3137, %353, %int1_2382 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %3139 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2383 = torch.constant.int 1
    %int7_2384 = torch.constant.int 7
    %int4800_2385 = torch.constant.int 4800
    %3140 = torch.prim.ListConstruct %int1_2383, %int7_2384, %int4800_2385 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3141 = torch.aten.reshape %3138, %3140 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %3142 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %3143 = torch.prim.tolist(%3142) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_2386 = torch.constant.int 2
    %3144 = torch.aten.split_with_sizes %3141, %3143, %int2_2386 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %3145:3 = torch.prim.ListUnpack %3144 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %3146 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2387 = torch.constant.int 1
    %int7_2388 = torch.constant.int 7
    %int25_2389 = torch.constant.int 25
    %int64_2390 = torch.constant.int 64
    %3147 = torch.prim.ListConstruct %int1_2387, %int7_2388, %int25_2389, %int64_2390 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3148 = torch.aten.reshape %3145#0, %3147 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2391 = torch.constant.int 1
    %int2_2392 = torch.constant.int 2
    %3149 = torch.aten.transpose.int %3148, %int1_2391, %int2_2392 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3150 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2393 = torch.constant.int 1
    %int7_2394 = torch.constant.int 7
    %int25_2395 = torch.constant.int 25
    %int64_2396 = torch.constant.int 64
    %3151 = torch.prim.ListConstruct %int1_2393, %int7_2394, %int25_2395, %int64_2396 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3152 = torch.aten.reshape %3145#1, %3151 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2397 = torch.constant.int 1
    %int2_2398 = torch.constant.int 2
    %3153 = torch.aten.transpose.int %3152, %int1_2397, %int2_2398 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3154 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2399 = torch.constant.int 1
    %int7_2400 = torch.constant.int 7
    %int25_2401 = torch.constant.int 25
    %int64_2402 = torch.constant.int 64
    %3155 = torch.prim.ListConstruct %int1_2399, %int7_2400, %int25_2401, %int64_2402 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3156 = torch.aten.reshape %3145#2, %3155 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2403 = torch.constant.int 1
    %int2_2404 = torch.constant.int 2
    %3157 = torch.aten.transpose.int %3156, %int1_2403, %int2_2404 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2405 = torch.constant.int 1
    %int2_2406 = torch.constant.int 2
    %3158 = torch.aten.transpose.int %3152, %int1_2405, %int2_2406 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_2407 = torch.constant.int 2
    %int3_2408 = torch.constant.int 3
    %3159 = torch.aten.transpose.int %3158, %int2_2407, %int3_2408 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %3160 = torch.aten.matmul %3149, %3159 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3161 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3162 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3163 = torch.aten.pow.Tensor_Tensor %3161, %3162 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %3164 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_2409 = torch.constant.int 0
    %3165 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_2410 = torch.constant.none
    %float0.000000e00_2411 = torch.constant.float 0.000000e+00
    %3166 = torch.aten.full %3165, %float0.000000e00_2411, %none_2410, %none_2410, %none_2410, %none_2410 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_2412 = torch.constant.int 1
    %3167 = torch.aten.add.Tensor %3166, %3163, %int1_2412 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %3168 = torch.aten.div.Tensor %3160, %3167 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3169 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_2413 = torch.constant.int 6
    %none_2414 = torch.constant.none
    %false_2415 = torch.constant.bool false
    %3170 = torch.aten.to.dtype %3168, %int6_2413, %false_2415, %false_2415, %none_2414 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3171 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %3172 = torch.aten.where.self %3169, %3170, %3171 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_2416 = torch.constant.int 3
    %none_2417 = torch.constant.none
    %3173 = torch.aten.softmax.int %3172, %int3_2416, %none_2417 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_2418 = torch.constant.int 6
    %none_2419 = torch.constant.none
    %false_2420 = torch.constant.bool false
    %3174 = torch.aten.to.dtype %3173, %int6_2418, %false_2420, %false_2420, %none_2419 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3175 = torch.aten.matmul %3174, %3157 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2421 = torch.constant.int 1
    %int2_2422 = torch.constant.int 2
    %3176 = torch.aten.transpose.int %3175, %int1_2421, %int2_2422 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %3177 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2423 = torch.constant.int 1
    %int7_2424 = torch.constant.int 7
    %int1600_2425 = torch.constant.int 1600
    %3178 = torch.prim.ListConstruct %int1_2423, %int7_2424, %int1600_2425 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3179 = torch.aten.reshape %3176, %3178 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %3180 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2426 = torch.constant.int 7
    %int1600_2427 = torch.constant.int 1600
    %3181 = torch.prim.ListConstruct %int7_2426, %int1600_2427 : (!torch.int, !torch.int) -> !torch.list<int>
    %3182 = torch.aten.reshape %3179, %3181 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2428 = torch.constant.int 0
    %int1_2429 = torch.constant.int 1
    %3183 = torch.aten.mm %3182, %354 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3184 = torch.aten.add.Tensor %3183, %355, %int1_2429 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3185 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2430 = torch.constant.int 1
    %int7_2431 = torch.constant.int 7
    %int1600_2432 = torch.constant.int 1600
    %3186 = torch.prim.ListConstruct %int1_2430, %int7_2431, %int1600_2432 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3187 = torch.aten.reshape %3184, %3186 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2433 = torch.constant.int 1
    %3188 = torch.aten.add.Tensor %3187, %3132, %int1_2433 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2434 = torch.constant.float 9.9999997473787516E-6
    %int1600_2435 = torch.constant.int 1600
    %3189 = torch.prim.ListConstruct %int1600_2435 : (!torch.int) -> !torch.list<int>
    %result0_2436, %result1_2437, %result2_2438 = torch.aten.native_layer_norm %3188, %3189, %356, %357, %float9.999990e-06_2434 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3190 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2439 = torch.constant.int 7
    %int1600_2440 = torch.constant.int 1600
    %3191 = torch.prim.ListConstruct %int7_2439, %int1600_2440 : (!torch.int, !torch.int) -> !torch.list<int>
    %3192 = torch.aten.reshape %result0_2436, %3191 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2441 = torch.constant.int 0
    %int1_2442 = torch.constant.int 1
    %3193 = torch.aten.mm %3192, %358 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %3194 = torch.aten.add.Tensor %3193, %359, %int1_2442 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %3195 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2443 = torch.constant.int 1
    %int7_2444 = torch.constant.int 7
    %int6400_2445 = torch.constant.int 6400
    %3196 = torch.prim.ListConstruct %int1_2443, %int7_2444, %int6400_2445 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3197 = torch.aten.reshape %3194, %3196 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %3198 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3199 = torch.aten.mul.Tensor %3197, %3198 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3200 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %3201 = torch.aten.pow.Tensor_Tensor %3197, %3200 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3202 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3203 = torch.aten.mul.Tensor %3201, %3202 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_2446 = torch.constant.int 1
    %3204 = torch.aten.add.Tensor %3197, %3203, %int1_2446 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3205 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %3206 = torch.aten.mul.Tensor %3204, %3205 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3207 = torch.aten.tanh %3206 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3208 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_2447 = torch.constant.int 1
    %3209 = torch.aten.add.Tensor %3207, %3208, %int1_2447 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3210 = torch.aten.mul.Tensor %3199, %3209 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3211 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2448 = torch.constant.int 7
    %int6400_2449 = torch.constant.int 6400
    %3212 = torch.prim.ListConstruct %int7_2448, %int6400_2449 : (!torch.int, !torch.int) -> !torch.list<int>
    %3213 = torch.aten.reshape %3210, %3212 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_2450 = torch.constant.int 0
    %int1_2451 = torch.constant.int 1
    %3214 = torch.aten.mm %3213, %360 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3215 = torch.aten.add.Tensor %3214, %361, %int1_2451 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3216 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2452 = torch.constant.int 1
    %int7_2453 = torch.constant.int 7
    %int1600_2454 = torch.constant.int 1600
    %3217 = torch.prim.ListConstruct %int1_2452, %int7_2453, %int1600_2454 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3218 = torch.aten.reshape %3215, %3217 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2455 = torch.constant.int 1
    %3219 = torch.aten.add.Tensor %3188, %3218, %int1_2455 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2456 = torch.constant.float 9.9999997473787516E-6
    %int1600_2457 = torch.constant.int 1600
    %3220 = torch.prim.ListConstruct %int1600_2457 : (!torch.int) -> !torch.list<int>
    %result0_2458, %result1_2459, %result2_2460 = torch.aten.native_layer_norm %3219, %3220, %362, %363, %float9.999990e-06_2456 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3221 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2461 = torch.constant.int 7
    %int1600_2462 = torch.constant.int 1600
    %3222 = torch.prim.ListConstruct %int7_2461, %int1600_2462 : (!torch.int, !torch.int) -> !torch.list<int>
    %3223 = torch.aten.reshape %result0_2458, %3222 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2463 = torch.constant.int 0
    %int1_2464 = torch.constant.int 1
    %3224 = torch.aten.mm %3223, %364 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %3225 = torch.aten.add.Tensor %3224, %365, %int1_2464 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %3226 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2465 = torch.constant.int 1
    %int7_2466 = torch.constant.int 7
    %int4800_2467 = torch.constant.int 4800
    %3227 = torch.prim.ListConstruct %int1_2465, %int7_2466, %int4800_2467 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3228 = torch.aten.reshape %3225, %3227 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %3229 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %3230 = torch.prim.tolist(%3229) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_2468 = torch.constant.int 2
    %3231 = torch.aten.split_with_sizes %3228, %3230, %int2_2468 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %3232:3 = torch.prim.ListUnpack %3231 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %3233 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2469 = torch.constant.int 1
    %int7_2470 = torch.constant.int 7
    %int25_2471 = torch.constant.int 25
    %int64_2472 = torch.constant.int 64
    %3234 = torch.prim.ListConstruct %int1_2469, %int7_2470, %int25_2471, %int64_2472 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3235 = torch.aten.reshape %3232#0, %3234 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2473 = torch.constant.int 1
    %int2_2474 = torch.constant.int 2
    %3236 = torch.aten.transpose.int %3235, %int1_2473, %int2_2474 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3237 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2475 = torch.constant.int 1
    %int7_2476 = torch.constant.int 7
    %int25_2477 = torch.constant.int 25
    %int64_2478 = torch.constant.int 64
    %3238 = torch.prim.ListConstruct %int1_2475, %int7_2476, %int25_2477, %int64_2478 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3239 = torch.aten.reshape %3232#1, %3238 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2479 = torch.constant.int 1
    %int2_2480 = torch.constant.int 2
    %3240 = torch.aten.transpose.int %3239, %int1_2479, %int2_2480 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3241 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2481 = torch.constant.int 1
    %int7_2482 = torch.constant.int 7
    %int25_2483 = torch.constant.int 25
    %int64_2484 = torch.constant.int 64
    %3242 = torch.prim.ListConstruct %int1_2481, %int7_2482, %int25_2483, %int64_2484 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3243 = torch.aten.reshape %3232#2, %3242 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2485 = torch.constant.int 1
    %int2_2486 = torch.constant.int 2
    %3244 = torch.aten.transpose.int %3243, %int1_2485, %int2_2486 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2487 = torch.constant.int 1
    %int2_2488 = torch.constant.int 2
    %3245 = torch.aten.transpose.int %3239, %int1_2487, %int2_2488 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_2489 = torch.constant.int 2
    %int3_2490 = torch.constant.int 3
    %3246 = torch.aten.transpose.int %3245, %int2_2489, %int3_2490 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %3247 = torch.aten.matmul %3236, %3246 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3248 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3249 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3250 = torch.aten.pow.Tensor_Tensor %3248, %3249 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %3251 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_2491 = torch.constant.int 0
    %3252 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_2492 = torch.constant.none
    %float0.000000e00_2493 = torch.constant.float 0.000000e+00
    %3253 = torch.aten.full %3252, %float0.000000e00_2493, %none_2492, %none_2492, %none_2492, %none_2492 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_2494 = torch.constant.int 1
    %3254 = torch.aten.add.Tensor %3253, %3250, %int1_2494 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %3255 = torch.aten.div.Tensor %3247, %3254 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3256 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_2495 = torch.constant.int 6
    %none_2496 = torch.constant.none
    %false_2497 = torch.constant.bool false
    %3257 = torch.aten.to.dtype %3255, %int6_2495, %false_2497, %false_2497, %none_2496 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3258 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %3259 = torch.aten.where.self %3256, %3257, %3258 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_2498 = torch.constant.int 3
    %none_2499 = torch.constant.none
    %3260 = torch.aten.softmax.int %3259, %int3_2498, %none_2499 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_2500 = torch.constant.int 6
    %none_2501 = torch.constant.none
    %false_2502 = torch.constant.bool false
    %3261 = torch.aten.to.dtype %3260, %int6_2500, %false_2502, %false_2502, %none_2501 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3262 = torch.aten.matmul %3261, %3244 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2503 = torch.constant.int 1
    %int2_2504 = torch.constant.int 2
    %3263 = torch.aten.transpose.int %3262, %int1_2503, %int2_2504 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %3264 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2505 = torch.constant.int 1
    %int7_2506 = torch.constant.int 7
    %int1600_2507 = torch.constant.int 1600
    %3265 = torch.prim.ListConstruct %int1_2505, %int7_2506, %int1600_2507 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3266 = torch.aten.reshape %3263, %3265 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %3267 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2508 = torch.constant.int 7
    %int1600_2509 = torch.constant.int 1600
    %3268 = torch.prim.ListConstruct %int7_2508, %int1600_2509 : (!torch.int, !torch.int) -> !torch.list<int>
    %3269 = torch.aten.reshape %3266, %3268 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2510 = torch.constant.int 0
    %int1_2511 = torch.constant.int 1
    %3270 = torch.aten.mm %3269, %366 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3271 = torch.aten.add.Tensor %3270, %367, %int1_2511 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3272 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2512 = torch.constant.int 1
    %int7_2513 = torch.constant.int 7
    %int1600_2514 = torch.constant.int 1600
    %3273 = torch.prim.ListConstruct %int1_2512, %int7_2513, %int1600_2514 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3274 = torch.aten.reshape %3271, %3273 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2515 = torch.constant.int 1
    %3275 = torch.aten.add.Tensor %3274, %3219, %int1_2515 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2516 = torch.constant.float 9.9999997473787516E-6
    %int1600_2517 = torch.constant.int 1600
    %3276 = torch.prim.ListConstruct %int1600_2517 : (!torch.int) -> !torch.list<int>
    %result0_2518, %result1_2519, %result2_2520 = torch.aten.native_layer_norm %3275, %3276, %368, %369, %float9.999990e-06_2516 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3277 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2521 = torch.constant.int 7
    %int1600_2522 = torch.constant.int 1600
    %3278 = torch.prim.ListConstruct %int7_2521, %int1600_2522 : (!torch.int, !torch.int) -> !torch.list<int>
    %3279 = torch.aten.reshape %result0_2518, %3278 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2523 = torch.constant.int 0
    %int1_2524 = torch.constant.int 1
    %3280 = torch.aten.mm %3279, %370 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %3281 = torch.aten.add.Tensor %3280, %371, %int1_2524 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %3282 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2525 = torch.constant.int 1
    %int7_2526 = torch.constant.int 7
    %int6400_2527 = torch.constant.int 6400
    %3283 = torch.prim.ListConstruct %int1_2525, %int7_2526, %int6400_2527 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3284 = torch.aten.reshape %3281, %3283 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %3285 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3286 = torch.aten.mul.Tensor %3284, %3285 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3287 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %3288 = torch.aten.pow.Tensor_Tensor %3284, %3287 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3289 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3290 = torch.aten.mul.Tensor %3288, %3289 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_2528 = torch.constant.int 1
    %3291 = torch.aten.add.Tensor %3284, %3290, %int1_2528 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3292 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %3293 = torch.aten.mul.Tensor %3291, %3292 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3294 = torch.aten.tanh %3293 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3295 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_2529 = torch.constant.int 1
    %3296 = torch.aten.add.Tensor %3294, %3295, %int1_2529 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3297 = torch.aten.mul.Tensor %3286, %3296 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3298 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2530 = torch.constant.int 7
    %int6400_2531 = torch.constant.int 6400
    %3299 = torch.prim.ListConstruct %int7_2530, %int6400_2531 : (!torch.int, !torch.int) -> !torch.list<int>
    %3300 = torch.aten.reshape %3297, %3299 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_2532 = torch.constant.int 0
    %int1_2533 = torch.constant.int 1
    %3301 = torch.aten.mm %3300, %372 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3302 = torch.aten.add.Tensor %3301, %373, %int1_2533 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3303 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2534 = torch.constant.int 1
    %int7_2535 = torch.constant.int 7
    %int1600_2536 = torch.constant.int 1600
    %3304 = torch.prim.ListConstruct %int1_2534, %int7_2535, %int1600_2536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3305 = torch.aten.reshape %3302, %3304 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2537 = torch.constant.int 1
    %3306 = torch.aten.add.Tensor %3275, %3305, %int1_2537 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2538 = torch.constant.float 9.9999997473787516E-6
    %int1600_2539 = torch.constant.int 1600
    %3307 = torch.prim.ListConstruct %int1600_2539 : (!torch.int) -> !torch.list<int>
    %result0_2540, %result1_2541, %result2_2542 = torch.aten.native_layer_norm %3306, %3307, %374, %375, %float9.999990e-06_2538 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3308 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2543 = torch.constant.int 7
    %int1600_2544 = torch.constant.int 1600
    %3309 = torch.prim.ListConstruct %int7_2543, %int1600_2544 : (!torch.int, !torch.int) -> !torch.list<int>
    %3310 = torch.aten.reshape %result0_2540, %3309 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2545 = torch.constant.int 0
    %int1_2546 = torch.constant.int 1
    %3311 = torch.aten.mm %3310, %376 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %3312 = torch.aten.add.Tensor %3311, %377, %int1_2546 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %3313 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2547 = torch.constant.int 1
    %int7_2548 = torch.constant.int 7
    %int4800_2549 = torch.constant.int 4800
    %3314 = torch.prim.ListConstruct %int1_2547, %int7_2548, %int4800_2549 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3315 = torch.aten.reshape %3312, %3314 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %3316 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %3317 = torch.prim.tolist(%3316) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_2550 = torch.constant.int 2
    %3318 = torch.aten.split_with_sizes %3315, %3317, %int2_2550 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %3319:3 = torch.prim.ListUnpack %3318 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %3320 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2551 = torch.constant.int 1
    %int7_2552 = torch.constant.int 7
    %int25_2553 = torch.constant.int 25
    %int64_2554 = torch.constant.int 64
    %3321 = torch.prim.ListConstruct %int1_2551, %int7_2552, %int25_2553, %int64_2554 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3322 = torch.aten.reshape %3319#0, %3321 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2555 = torch.constant.int 1
    %int2_2556 = torch.constant.int 2
    %3323 = torch.aten.transpose.int %3322, %int1_2555, %int2_2556 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3324 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2557 = torch.constant.int 1
    %int7_2558 = torch.constant.int 7
    %int25_2559 = torch.constant.int 25
    %int64_2560 = torch.constant.int 64
    %3325 = torch.prim.ListConstruct %int1_2557, %int7_2558, %int25_2559, %int64_2560 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3326 = torch.aten.reshape %3319#1, %3325 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2561 = torch.constant.int 1
    %int2_2562 = torch.constant.int 2
    %3327 = torch.aten.transpose.int %3326, %int1_2561, %int2_2562 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3328 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2563 = torch.constant.int 1
    %int7_2564 = torch.constant.int 7
    %int25_2565 = torch.constant.int 25
    %int64_2566 = torch.constant.int 64
    %3329 = torch.prim.ListConstruct %int1_2563, %int7_2564, %int25_2565, %int64_2566 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3330 = torch.aten.reshape %3319#2, %3329 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2567 = torch.constant.int 1
    %int2_2568 = torch.constant.int 2
    %3331 = torch.aten.transpose.int %3330, %int1_2567, %int2_2568 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2569 = torch.constant.int 1
    %int2_2570 = torch.constant.int 2
    %3332 = torch.aten.transpose.int %3326, %int1_2569, %int2_2570 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_2571 = torch.constant.int 2
    %int3_2572 = torch.constant.int 3
    %3333 = torch.aten.transpose.int %3332, %int2_2571, %int3_2572 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %3334 = torch.aten.matmul %3323, %3333 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3335 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3336 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3337 = torch.aten.pow.Tensor_Tensor %3335, %3336 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %3338 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_2573 = torch.constant.int 0
    %3339 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_2574 = torch.constant.none
    %float0.000000e00_2575 = torch.constant.float 0.000000e+00
    %3340 = torch.aten.full %3339, %float0.000000e00_2575, %none_2574, %none_2574, %none_2574, %none_2574 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_2576 = torch.constant.int 1
    %3341 = torch.aten.add.Tensor %3340, %3337, %int1_2576 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %3342 = torch.aten.div.Tensor %3334, %3341 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3343 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_2577 = torch.constant.int 6
    %none_2578 = torch.constant.none
    %false_2579 = torch.constant.bool false
    %3344 = torch.aten.to.dtype %3342, %int6_2577, %false_2579, %false_2579, %none_2578 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3345 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %3346 = torch.aten.where.self %3343, %3344, %3345 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_2580 = torch.constant.int 3
    %none_2581 = torch.constant.none
    %3347 = torch.aten.softmax.int %3346, %int3_2580, %none_2581 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_2582 = torch.constant.int 6
    %none_2583 = torch.constant.none
    %false_2584 = torch.constant.bool false
    %3348 = torch.aten.to.dtype %3347, %int6_2582, %false_2584, %false_2584, %none_2583 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3349 = torch.aten.matmul %3348, %3331 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2585 = torch.constant.int 1
    %int2_2586 = torch.constant.int 2
    %3350 = torch.aten.transpose.int %3349, %int1_2585, %int2_2586 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %3351 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2587 = torch.constant.int 1
    %int7_2588 = torch.constant.int 7
    %int1600_2589 = torch.constant.int 1600
    %3352 = torch.prim.ListConstruct %int1_2587, %int7_2588, %int1600_2589 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3353 = torch.aten.reshape %3350, %3352 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %3354 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2590 = torch.constant.int 7
    %int1600_2591 = torch.constant.int 1600
    %3355 = torch.prim.ListConstruct %int7_2590, %int1600_2591 : (!torch.int, !torch.int) -> !torch.list<int>
    %3356 = torch.aten.reshape %3353, %3355 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2592 = torch.constant.int 0
    %int1_2593 = torch.constant.int 1
    %3357 = torch.aten.mm %3356, %378 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3358 = torch.aten.add.Tensor %3357, %379, %int1_2593 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3359 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2594 = torch.constant.int 1
    %int7_2595 = torch.constant.int 7
    %int1600_2596 = torch.constant.int 1600
    %3360 = torch.prim.ListConstruct %int1_2594, %int7_2595, %int1600_2596 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3361 = torch.aten.reshape %3358, %3360 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2597 = torch.constant.int 1
    %3362 = torch.aten.add.Tensor %3361, %3306, %int1_2597 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2598 = torch.constant.float 9.9999997473787516E-6
    %int1600_2599 = torch.constant.int 1600
    %3363 = torch.prim.ListConstruct %int1600_2599 : (!torch.int) -> !torch.list<int>
    %result0_2600, %result1_2601, %result2_2602 = torch.aten.native_layer_norm %3362, %3363, %380, %381, %float9.999990e-06_2598 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3364 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2603 = torch.constant.int 7
    %int1600_2604 = torch.constant.int 1600
    %3365 = torch.prim.ListConstruct %int7_2603, %int1600_2604 : (!torch.int, !torch.int) -> !torch.list<int>
    %3366 = torch.aten.reshape %result0_2600, %3365 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2605 = torch.constant.int 0
    %int1_2606 = torch.constant.int 1
    %3367 = torch.aten.mm %3366, %382 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %3368 = torch.aten.add.Tensor %3367, %383, %int1_2606 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %3369 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2607 = torch.constant.int 1
    %int7_2608 = torch.constant.int 7
    %int6400_2609 = torch.constant.int 6400
    %3370 = torch.prim.ListConstruct %int1_2607, %int7_2608, %int6400_2609 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3371 = torch.aten.reshape %3368, %3370 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %3372 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3373 = torch.aten.mul.Tensor %3371, %3372 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3374 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %3375 = torch.aten.pow.Tensor_Tensor %3371, %3374 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3376 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3377 = torch.aten.mul.Tensor %3375, %3376 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_2610 = torch.constant.int 1
    %3378 = torch.aten.add.Tensor %3371, %3377, %int1_2610 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3379 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %3380 = torch.aten.mul.Tensor %3378, %3379 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3381 = torch.aten.tanh %3380 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3382 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_2611 = torch.constant.int 1
    %3383 = torch.aten.add.Tensor %3381, %3382, %int1_2611 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3384 = torch.aten.mul.Tensor %3373, %3383 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3385 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2612 = torch.constant.int 7
    %int6400_2613 = torch.constant.int 6400
    %3386 = torch.prim.ListConstruct %int7_2612, %int6400_2613 : (!torch.int, !torch.int) -> !torch.list<int>
    %3387 = torch.aten.reshape %3384, %3386 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_2614 = torch.constant.int 0
    %int1_2615 = torch.constant.int 1
    %3388 = torch.aten.mm %3387, %384 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3389 = torch.aten.add.Tensor %3388, %385, %int1_2615 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3390 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2616 = torch.constant.int 1
    %int7_2617 = torch.constant.int 7
    %int1600_2618 = torch.constant.int 1600
    %3391 = torch.prim.ListConstruct %int1_2616, %int7_2617, %int1600_2618 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3392 = torch.aten.reshape %3389, %3391 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2619 = torch.constant.int 1
    %3393 = torch.aten.add.Tensor %3362, %3392, %int1_2619 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2620 = torch.constant.float 9.9999997473787516E-6
    %int1600_2621 = torch.constant.int 1600
    %3394 = torch.prim.ListConstruct %int1600_2621 : (!torch.int) -> !torch.list<int>
    %result0_2622, %result1_2623, %result2_2624 = torch.aten.native_layer_norm %3393, %3394, %386, %387, %float9.999990e-06_2620 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3395 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2625 = torch.constant.int 7
    %int1600_2626 = torch.constant.int 1600
    %3396 = torch.prim.ListConstruct %int7_2625, %int1600_2626 : (!torch.int, !torch.int) -> !torch.list<int>
    %3397 = torch.aten.reshape %result0_2622, %3396 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2627 = torch.constant.int 0
    %int1_2628 = torch.constant.int 1
    %3398 = torch.aten.mm %3397, %388 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %3399 = torch.aten.add.Tensor %3398, %389, %int1_2628 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %3400 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2629 = torch.constant.int 1
    %int7_2630 = torch.constant.int 7
    %int4800_2631 = torch.constant.int 4800
    %3401 = torch.prim.ListConstruct %int1_2629, %int7_2630, %int4800_2631 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3402 = torch.aten.reshape %3399, %3401 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %3403 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %3404 = torch.prim.tolist(%3403) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_2632 = torch.constant.int 2
    %3405 = torch.aten.split_with_sizes %3402, %3404, %int2_2632 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %3406:3 = torch.prim.ListUnpack %3405 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %3407 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2633 = torch.constant.int 1
    %int7_2634 = torch.constant.int 7
    %int25_2635 = torch.constant.int 25
    %int64_2636 = torch.constant.int 64
    %3408 = torch.prim.ListConstruct %int1_2633, %int7_2634, %int25_2635, %int64_2636 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3409 = torch.aten.reshape %3406#0, %3408 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2637 = torch.constant.int 1
    %int2_2638 = torch.constant.int 2
    %3410 = torch.aten.transpose.int %3409, %int1_2637, %int2_2638 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3411 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2639 = torch.constant.int 1
    %int7_2640 = torch.constant.int 7
    %int25_2641 = torch.constant.int 25
    %int64_2642 = torch.constant.int 64
    %3412 = torch.prim.ListConstruct %int1_2639, %int7_2640, %int25_2641, %int64_2642 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3413 = torch.aten.reshape %3406#1, %3412 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2643 = torch.constant.int 1
    %int2_2644 = torch.constant.int 2
    %3414 = torch.aten.transpose.int %3413, %int1_2643, %int2_2644 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3415 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2645 = torch.constant.int 1
    %int7_2646 = torch.constant.int 7
    %int25_2647 = torch.constant.int 25
    %int64_2648 = torch.constant.int 64
    %3416 = torch.prim.ListConstruct %int1_2645, %int7_2646, %int25_2647, %int64_2648 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3417 = torch.aten.reshape %3406#2, %3416 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2649 = torch.constant.int 1
    %int2_2650 = torch.constant.int 2
    %3418 = torch.aten.transpose.int %3417, %int1_2649, %int2_2650 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2651 = torch.constant.int 1
    %int2_2652 = torch.constant.int 2
    %3419 = torch.aten.transpose.int %3413, %int1_2651, %int2_2652 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_2653 = torch.constant.int 2
    %int3_2654 = torch.constant.int 3
    %3420 = torch.aten.transpose.int %3419, %int2_2653, %int3_2654 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %3421 = torch.aten.matmul %3410, %3420 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3422 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3423 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3424 = torch.aten.pow.Tensor_Tensor %3422, %3423 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %3425 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_2655 = torch.constant.int 0
    %3426 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_2656 = torch.constant.none
    %float0.000000e00_2657 = torch.constant.float 0.000000e+00
    %3427 = torch.aten.full %3426, %float0.000000e00_2657, %none_2656, %none_2656, %none_2656, %none_2656 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_2658 = torch.constant.int 1
    %3428 = torch.aten.add.Tensor %3427, %3424, %int1_2658 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %3429 = torch.aten.div.Tensor %3421, %3428 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3430 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_2659 = torch.constant.int 6
    %none_2660 = torch.constant.none
    %false_2661 = torch.constant.bool false
    %3431 = torch.aten.to.dtype %3429, %int6_2659, %false_2661, %false_2661, %none_2660 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3432 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %3433 = torch.aten.where.self %3430, %3431, %3432 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_2662 = torch.constant.int 3
    %none_2663 = torch.constant.none
    %3434 = torch.aten.softmax.int %3433, %int3_2662, %none_2663 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_2664 = torch.constant.int 6
    %none_2665 = torch.constant.none
    %false_2666 = torch.constant.bool false
    %3435 = torch.aten.to.dtype %3434, %int6_2664, %false_2666, %false_2666, %none_2665 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3436 = torch.aten.matmul %3435, %3418 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2667 = torch.constant.int 1
    %int2_2668 = torch.constant.int 2
    %3437 = torch.aten.transpose.int %3436, %int1_2667, %int2_2668 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %3438 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2669 = torch.constant.int 1
    %int7_2670 = torch.constant.int 7
    %int1600_2671 = torch.constant.int 1600
    %3439 = torch.prim.ListConstruct %int1_2669, %int7_2670, %int1600_2671 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3440 = torch.aten.reshape %3437, %3439 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %3441 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2672 = torch.constant.int 7
    %int1600_2673 = torch.constant.int 1600
    %3442 = torch.prim.ListConstruct %int7_2672, %int1600_2673 : (!torch.int, !torch.int) -> !torch.list<int>
    %3443 = torch.aten.reshape %3440, %3442 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2674 = torch.constant.int 0
    %int1_2675 = torch.constant.int 1
    %3444 = torch.aten.mm %3443, %390 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3445 = torch.aten.add.Tensor %3444, %391, %int1_2675 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3446 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2676 = torch.constant.int 1
    %int7_2677 = torch.constant.int 7
    %int1600_2678 = torch.constant.int 1600
    %3447 = torch.prim.ListConstruct %int1_2676, %int7_2677, %int1600_2678 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3448 = torch.aten.reshape %3445, %3447 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2679 = torch.constant.int 1
    %3449 = torch.aten.add.Tensor %3448, %3393, %int1_2679 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2680 = torch.constant.float 9.9999997473787516E-6
    %int1600_2681 = torch.constant.int 1600
    %3450 = torch.prim.ListConstruct %int1600_2681 : (!torch.int) -> !torch.list<int>
    %result0_2682, %result1_2683, %result2_2684 = torch.aten.native_layer_norm %3449, %3450, %392, %393, %float9.999990e-06_2680 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3451 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2685 = torch.constant.int 7
    %int1600_2686 = torch.constant.int 1600
    %3452 = torch.prim.ListConstruct %int7_2685, %int1600_2686 : (!torch.int, !torch.int) -> !torch.list<int>
    %3453 = torch.aten.reshape %result0_2682, %3452 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2687 = torch.constant.int 0
    %int1_2688 = torch.constant.int 1
    %3454 = torch.aten.mm %3453, %394 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %3455 = torch.aten.add.Tensor %3454, %395, %int1_2688 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %3456 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2689 = torch.constant.int 1
    %int7_2690 = torch.constant.int 7
    %int6400_2691 = torch.constant.int 6400
    %3457 = torch.prim.ListConstruct %int1_2689, %int7_2690, %int6400_2691 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3458 = torch.aten.reshape %3455, %3457 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %3459 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3460 = torch.aten.mul.Tensor %3458, %3459 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3461 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %3462 = torch.aten.pow.Tensor_Tensor %3458, %3461 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3463 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3464 = torch.aten.mul.Tensor %3462, %3463 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_2692 = torch.constant.int 1
    %3465 = torch.aten.add.Tensor %3458, %3464, %int1_2692 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3466 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %3467 = torch.aten.mul.Tensor %3465, %3466 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3468 = torch.aten.tanh %3467 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3469 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_2693 = torch.constant.int 1
    %3470 = torch.aten.add.Tensor %3468, %3469, %int1_2693 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3471 = torch.aten.mul.Tensor %3460, %3470 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3472 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2694 = torch.constant.int 7
    %int6400_2695 = torch.constant.int 6400
    %3473 = torch.prim.ListConstruct %int7_2694, %int6400_2695 : (!torch.int, !torch.int) -> !torch.list<int>
    %3474 = torch.aten.reshape %3471, %3473 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_2696 = torch.constant.int 0
    %int1_2697 = torch.constant.int 1
    %3475 = torch.aten.mm %3474, %396 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3476 = torch.aten.add.Tensor %3475, %397, %int1_2697 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3477 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2698 = torch.constant.int 1
    %int7_2699 = torch.constant.int 7
    %int1600_2700 = torch.constant.int 1600
    %3478 = torch.prim.ListConstruct %int1_2698, %int7_2699, %int1600_2700 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3479 = torch.aten.reshape %3476, %3478 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2701 = torch.constant.int 1
    %3480 = torch.aten.add.Tensor %3449, %3479, %int1_2701 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2702 = torch.constant.float 9.9999997473787516E-6
    %int1600_2703 = torch.constant.int 1600
    %3481 = torch.prim.ListConstruct %int1600_2703 : (!torch.int) -> !torch.list<int>
    %result0_2704, %result1_2705, %result2_2706 = torch.aten.native_layer_norm %3480, %3481, %398, %399, %float9.999990e-06_2702 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3482 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2707 = torch.constant.int 7
    %int1600_2708 = torch.constant.int 1600
    %3483 = torch.prim.ListConstruct %int7_2707, %int1600_2708 : (!torch.int, !torch.int) -> !torch.list<int>
    %3484 = torch.aten.reshape %result0_2704, %3483 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2709 = torch.constant.int 0
    %int1_2710 = torch.constant.int 1
    %3485 = torch.aten.mm %3484, %400 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %3486 = torch.aten.add.Tensor %3485, %401, %int1_2710 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %3487 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2711 = torch.constant.int 1
    %int7_2712 = torch.constant.int 7
    %int4800_2713 = torch.constant.int 4800
    %3488 = torch.prim.ListConstruct %int1_2711, %int7_2712, %int4800_2713 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3489 = torch.aten.reshape %3486, %3488 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %3490 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %3491 = torch.prim.tolist(%3490) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_2714 = torch.constant.int 2
    %3492 = torch.aten.split_with_sizes %3489, %3491, %int2_2714 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %3493:3 = torch.prim.ListUnpack %3492 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %3494 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2715 = torch.constant.int 1
    %int7_2716 = torch.constant.int 7
    %int25_2717 = torch.constant.int 25
    %int64_2718 = torch.constant.int 64
    %3495 = torch.prim.ListConstruct %int1_2715, %int7_2716, %int25_2717, %int64_2718 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3496 = torch.aten.reshape %3493#0, %3495 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2719 = torch.constant.int 1
    %int2_2720 = torch.constant.int 2
    %3497 = torch.aten.transpose.int %3496, %int1_2719, %int2_2720 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3498 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2721 = torch.constant.int 1
    %int7_2722 = torch.constant.int 7
    %int25_2723 = torch.constant.int 25
    %int64_2724 = torch.constant.int 64
    %3499 = torch.prim.ListConstruct %int1_2721, %int7_2722, %int25_2723, %int64_2724 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3500 = torch.aten.reshape %3493#1, %3499 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2725 = torch.constant.int 1
    %int2_2726 = torch.constant.int 2
    %3501 = torch.aten.transpose.int %3500, %int1_2725, %int2_2726 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3502 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2727 = torch.constant.int 1
    %int7_2728 = torch.constant.int 7
    %int25_2729 = torch.constant.int 25
    %int64_2730 = torch.constant.int 64
    %3503 = torch.prim.ListConstruct %int1_2727, %int7_2728, %int25_2729, %int64_2730 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3504 = torch.aten.reshape %3493#2, %3503 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2731 = torch.constant.int 1
    %int2_2732 = torch.constant.int 2
    %3505 = torch.aten.transpose.int %3504, %int1_2731, %int2_2732 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2733 = torch.constant.int 1
    %int2_2734 = torch.constant.int 2
    %3506 = torch.aten.transpose.int %3500, %int1_2733, %int2_2734 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_2735 = torch.constant.int 2
    %int3_2736 = torch.constant.int 3
    %3507 = torch.aten.transpose.int %3506, %int2_2735, %int3_2736 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %3508 = torch.aten.matmul %3497, %3507 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3509 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3510 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3511 = torch.aten.pow.Tensor_Tensor %3509, %3510 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %3512 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_2737 = torch.constant.int 0
    %3513 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_2738 = torch.constant.none
    %float0.000000e00_2739 = torch.constant.float 0.000000e+00
    %3514 = torch.aten.full %3513, %float0.000000e00_2739, %none_2738, %none_2738, %none_2738, %none_2738 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_2740 = torch.constant.int 1
    %3515 = torch.aten.add.Tensor %3514, %3511, %int1_2740 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %3516 = torch.aten.div.Tensor %3508, %3515 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3517 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_2741 = torch.constant.int 6
    %none_2742 = torch.constant.none
    %false_2743 = torch.constant.bool false
    %3518 = torch.aten.to.dtype %3516, %int6_2741, %false_2743, %false_2743, %none_2742 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3519 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %3520 = torch.aten.where.self %3517, %3518, %3519 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_2744 = torch.constant.int 3
    %none_2745 = torch.constant.none
    %3521 = torch.aten.softmax.int %3520, %int3_2744, %none_2745 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_2746 = torch.constant.int 6
    %none_2747 = torch.constant.none
    %false_2748 = torch.constant.bool false
    %3522 = torch.aten.to.dtype %3521, %int6_2746, %false_2748, %false_2748, %none_2747 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3523 = torch.aten.matmul %3522, %3505 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2749 = torch.constant.int 1
    %int2_2750 = torch.constant.int 2
    %3524 = torch.aten.transpose.int %3523, %int1_2749, %int2_2750 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %3525 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2751 = torch.constant.int 1
    %int7_2752 = torch.constant.int 7
    %int1600_2753 = torch.constant.int 1600
    %3526 = torch.prim.ListConstruct %int1_2751, %int7_2752, %int1600_2753 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3527 = torch.aten.reshape %3524, %3526 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %3528 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2754 = torch.constant.int 7
    %int1600_2755 = torch.constant.int 1600
    %3529 = torch.prim.ListConstruct %int7_2754, %int1600_2755 : (!torch.int, !torch.int) -> !torch.list<int>
    %3530 = torch.aten.reshape %3527, %3529 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2756 = torch.constant.int 0
    %int1_2757 = torch.constant.int 1
    %3531 = torch.aten.mm %3530, %402 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3532 = torch.aten.add.Tensor %3531, %403, %int1_2757 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3533 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2758 = torch.constant.int 1
    %int7_2759 = torch.constant.int 7
    %int1600_2760 = torch.constant.int 1600
    %3534 = torch.prim.ListConstruct %int1_2758, %int7_2759, %int1600_2760 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3535 = torch.aten.reshape %3532, %3534 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2761 = torch.constant.int 1
    %3536 = torch.aten.add.Tensor %3535, %3480, %int1_2761 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2762 = torch.constant.float 9.9999997473787516E-6
    %int1600_2763 = torch.constant.int 1600
    %3537 = torch.prim.ListConstruct %int1600_2763 : (!torch.int) -> !torch.list<int>
    %result0_2764, %result1_2765, %result2_2766 = torch.aten.native_layer_norm %3536, %3537, %404, %405, %float9.999990e-06_2762 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3538 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2767 = torch.constant.int 7
    %int1600_2768 = torch.constant.int 1600
    %3539 = torch.prim.ListConstruct %int7_2767, %int1600_2768 : (!torch.int, !torch.int) -> !torch.list<int>
    %3540 = torch.aten.reshape %result0_2764, %3539 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2769 = torch.constant.int 0
    %int1_2770 = torch.constant.int 1
    %3541 = torch.aten.mm %3540, %406 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %3542 = torch.aten.add.Tensor %3541, %407, %int1_2770 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %3543 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2771 = torch.constant.int 1
    %int7_2772 = torch.constant.int 7
    %int6400_2773 = torch.constant.int 6400
    %3544 = torch.prim.ListConstruct %int1_2771, %int7_2772, %int6400_2773 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3545 = torch.aten.reshape %3542, %3544 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %3546 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3547 = torch.aten.mul.Tensor %3545, %3546 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3548 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %3549 = torch.aten.pow.Tensor_Tensor %3545, %3548 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3550 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3551 = torch.aten.mul.Tensor %3549, %3550 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_2774 = torch.constant.int 1
    %3552 = torch.aten.add.Tensor %3545, %3551, %int1_2774 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3553 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %3554 = torch.aten.mul.Tensor %3552, %3553 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3555 = torch.aten.tanh %3554 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3556 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_2775 = torch.constant.int 1
    %3557 = torch.aten.add.Tensor %3555, %3556, %int1_2775 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3558 = torch.aten.mul.Tensor %3547, %3557 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3559 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2776 = torch.constant.int 7
    %int6400_2777 = torch.constant.int 6400
    %3560 = torch.prim.ListConstruct %int7_2776, %int6400_2777 : (!torch.int, !torch.int) -> !torch.list<int>
    %3561 = torch.aten.reshape %3558, %3560 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_2778 = torch.constant.int 0
    %int1_2779 = torch.constant.int 1
    %3562 = torch.aten.mm %3561, %408 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3563 = torch.aten.add.Tensor %3562, %409, %int1_2779 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3564 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2780 = torch.constant.int 1
    %int7_2781 = torch.constant.int 7
    %int1600_2782 = torch.constant.int 1600
    %3565 = torch.prim.ListConstruct %int1_2780, %int7_2781, %int1600_2782 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3566 = torch.aten.reshape %3563, %3565 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2783 = torch.constant.int 1
    %3567 = torch.aten.add.Tensor %3536, %3566, %int1_2783 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2784 = torch.constant.float 9.9999997473787516E-6
    %int1600_2785 = torch.constant.int 1600
    %3568 = torch.prim.ListConstruct %int1600_2785 : (!torch.int) -> !torch.list<int>
    %result0_2786, %result1_2787, %result2_2788 = torch.aten.native_layer_norm %3567, %3568, %410, %411, %float9.999990e-06_2784 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3569 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2789 = torch.constant.int 7
    %int1600_2790 = torch.constant.int 1600
    %3570 = torch.prim.ListConstruct %int7_2789, %int1600_2790 : (!torch.int, !torch.int) -> !torch.list<int>
    %3571 = torch.aten.reshape %result0_2786, %3570 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2791 = torch.constant.int 0
    %int1_2792 = torch.constant.int 1
    %3572 = torch.aten.mm %3571, %412 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %3573 = torch.aten.add.Tensor %3572, %413, %int1_2792 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %3574 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2793 = torch.constant.int 1
    %int7_2794 = torch.constant.int 7
    %int4800_2795 = torch.constant.int 4800
    %3575 = torch.prim.ListConstruct %int1_2793, %int7_2794, %int4800_2795 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3576 = torch.aten.reshape %3573, %3575 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %3577 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %3578 = torch.prim.tolist(%3577) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_2796 = torch.constant.int 2
    %3579 = torch.aten.split_with_sizes %3576, %3578, %int2_2796 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %3580:3 = torch.prim.ListUnpack %3579 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %3581 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2797 = torch.constant.int 1
    %int7_2798 = torch.constant.int 7
    %int25_2799 = torch.constant.int 25
    %int64_2800 = torch.constant.int 64
    %3582 = torch.prim.ListConstruct %int1_2797, %int7_2798, %int25_2799, %int64_2800 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3583 = torch.aten.reshape %3580#0, %3582 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2801 = torch.constant.int 1
    %int2_2802 = torch.constant.int 2
    %3584 = torch.aten.transpose.int %3583, %int1_2801, %int2_2802 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3585 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2803 = torch.constant.int 1
    %int7_2804 = torch.constant.int 7
    %int25_2805 = torch.constant.int 25
    %int64_2806 = torch.constant.int 64
    %3586 = torch.prim.ListConstruct %int1_2803, %int7_2804, %int25_2805, %int64_2806 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3587 = torch.aten.reshape %3580#1, %3586 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2807 = torch.constant.int 1
    %int2_2808 = torch.constant.int 2
    %3588 = torch.aten.transpose.int %3587, %int1_2807, %int2_2808 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3589 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2809 = torch.constant.int 1
    %int7_2810 = torch.constant.int 7
    %int25_2811 = torch.constant.int 25
    %int64_2812 = torch.constant.int 64
    %3590 = torch.prim.ListConstruct %int1_2809, %int7_2810, %int25_2811, %int64_2812 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3591 = torch.aten.reshape %3580#2, %3590 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2813 = torch.constant.int 1
    %int2_2814 = torch.constant.int 2
    %3592 = torch.aten.transpose.int %3591, %int1_2813, %int2_2814 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2815 = torch.constant.int 1
    %int2_2816 = torch.constant.int 2
    %3593 = torch.aten.transpose.int %3587, %int1_2815, %int2_2816 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_2817 = torch.constant.int 2
    %int3_2818 = torch.constant.int 3
    %3594 = torch.aten.transpose.int %3593, %int2_2817, %int3_2818 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %3595 = torch.aten.matmul %3584, %3594 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3596 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3597 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3598 = torch.aten.pow.Tensor_Tensor %3596, %3597 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %3599 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_2819 = torch.constant.int 0
    %3600 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_2820 = torch.constant.none
    %float0.000000e00_2821 = torch.constant.float 0.000000e+00
    %3601 = torch.aten.full %3600, %float0.000000e00_2821, %none_2820, %none_2820, %none_2820, %none_2820 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_2822 = torch.constant.int 1
    %3602 = torch.aten.add.Tensor %3601, %3598, %int1_2822 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %3603 = torch.aten.div.Tensor %3595, %3602 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3604 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_2823 = torch.constant.int 6
    %none_2824 = torch.constant.none
    %false_2825 = torch.constant.bool false
    %3605 = torch.aten.to.dtype %3603, %int6_2823, %false_2825, %false_2825, %none_2824 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3606 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %3607 = torch.aten.where.self %3604, %3605, %3606 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_2826 = torch.constant.int 3
    %none_2827 = torch.constant.none
    %3608 = torch.aten.softmax.int %3607, %int3_2826, %none_2827 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_2828 = torch.constant.int 6
    %none_2829 = torch.constant.none
    %false_2830 = torch.constant.bool false
    %3609 = torch.aten.to.dtype %3608, %int6_2828, %false_2830, %false_2830, %none_2829 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3610 = torch.aten.matmul %3609, %3592 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2831 = torch.constant.int 1
    %int2_2832 = torch.constant.int 2
    %3611 = torch.aten.transpose.int %3610, %int1_2831, %int2_2832 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %3612 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2833 = torch.constant.int 1
    %int7_2834 = torch.constant.int 7
    %int1600_2835 = torch.constant.int 1600
    %3613 = torch.prim.ListConstruct %int1_2833, %int7_2834, %int1600_2835 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3614 = torch.aten.reshape %3611, %3613 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %3615 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2836 = torch.constant.int 7
    %int1600_2837 = torch.constant.int 1600
    %3616 = torch.prim.ListConstruct %int7_2836, %int1600_2837 : (!torch.int, !torch.int) -> !torch.list<int>
    %3617 = torch.aten.reshape %3614, %3616 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2838 = torch.constant.int 0
    %int1_2839 = torch.constant.int 1
    %3618 = torch.aten.mm %3617, %414 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3619 = torch.aten.add.Tensor %3618, %415, %int1_2839 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3620 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2840 = torch.constant.int 1
    %int7_2841 = torch.constant.int 7
    %int1600_2842 = torch.constant.int 1600
    %3621 = torch.prim.ListConstruct %int1_2840, %int7_2841, %int1600_2842 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3622 = torch.aten.reshape %3619, %3621 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2843 = torch.constant.int 1
    %3623 = torch.aten.add.Tensor %3622, %3567, %int1_2843 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2844 = torch.constant.float 9.9999997473787516E-6
    %int1600_2845 = torch.constant.int 1600
    %3624 = torch.prim.ListConstruct %int1600_2845 : (!torch.int) -> !torch.list<int>
    %result0_2846, %result1_2847, %result2_2848 = torch.aten.native_layer_norm %3623, %3624, %416, %417, %float9.999990e-06_2844 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3625 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2849 = torch.constant.int 7
    %int1600_2850 = torch.constant.int 1600
    %3626 = torch.prim.ListConstruct %int7_2849, %int1600_2850 : (!torch.int, !torch.int) -> !torch.list<int>
    %3627 = torch.aten.reshape %result0_2846, %3626 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2851 = torch.constant.int 0
    %int1_2852 = torch.constant.int 1
    %3628 = torch.aten.mm %3627, %418 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %3629 = torch.aten.add.Tensor %3628, %419, %int1_2852 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %3630 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2853 = torch.constant.int 1
    %int7_2854 = torch.constant.int 7
    %int6400_2855 = torch.constant.int 6400
    %3631 = torch.prim.ListConstruct %int1_2853, %int7_2854, %int6400_2855 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3632 = torch.aten.reshape %3629, %3631 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %3633 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3634 = torch.aten.mul.Tensor %3632, %3633 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3635 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %3636 = torch.aten.pow.Tensor_Tensor %3632, %3635 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3637 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3638 = torch.aten.mul.Tensor %3636, %3637 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_2856 = torch.constant.int 1
    %3639 = torch.aten.add.Tensor %3632, %3638, %int1_2856 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3640 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %3641 = torch.aten.mul.Tensor %3639, %3640 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3642 = torch.aten.tanh %3641 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3643 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_2857 = torch.constant.int 1
    %3644 = torch.aten.add.Tensor %3642, %3643, %int1_2857 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3645 = torch.aten.mul.Tensor %3634, %3644 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3646 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2858 = torch.constant.int 7
    %int6400_2859 = torch.constant.int 6400
    %3647 = torch.prim.ListConstruct %int7_2858, %int6400_2859 : (!torch.int, !torch.int) -> !torch.list<int>
    %3648 = torch.aten.reshape %3645, %3647 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_2860 = torch.constant.int 0
    %int1_2861 = torch.constant.int 1
    %3649 = torch.aten.mm %3648, %420 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3650 = torch.aten.add.Tensor %3649, %421, %int1_2861 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3651 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2862 = torch.constant.int 1
    %int7_2863 = torch.constant.int 7
    %int1600_2864 = torch.constant.int 1600
    %3652 = torch.prim.ListConstruct %int1_2862, %int7_2863, %int1600_2864 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3653 = torch.aten.reshape %3650, %3652 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2865 = torch.constant.int 1
    %3654 = torch.aten.add.Tensor %3623, %3653, %int1_2865 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2866 = torch.constant.float 9.9999997473787516E-6
    %int1600_2867 = torch.constant.int 1600
    %3655 = torch.prim.ListConstruct %int1600_2867 : (!torch.int) -> !torch.list<int>
    %result0_2868, %result1_2869, %result2_2870 = torch.aten.native_layer_norm %3654, %3655, %422, %423, %float9.999990e-06_2866 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3656 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2871 = torch.constant.int 7
    %int1600_2872 = torch.constant.int 1600
    %3657 = torch.prim.ListConstruct %int7_2871, %int1600_2872 : (!torch.int, !torch.int) -> !torch.list<int>
    %3658 = torch.aten.reshape %result0_2868, %3657 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2873 = torch.constant.int 0
    %int1_2874 = torch.constant.int 1
    %3659 = torch.aten.mm %3658, %424 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %3660 = torch.aten.add.Tensor %3659, %425, %int1_2874 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %3661 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2875 = torch.constant.int 1
    %int7_2876 = torch.constant.int 7
    %int4800_2877 = torch.constant.int 4800
    %3662 = torch.prim.ListConstruct %int1_2875, %int7_2876, %int4800_2877 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3663 = torch.aten.reshape %3660, %3662 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %3664 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %3665 = torch.prim.tolist(%3664) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_2878 = torch.constant.int 2
    %3666 = torch.aten.split_with_sizes %3663, %3665, %int2_2878 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %3667:3 = torch.prim.ListUnpack %3666 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %3668 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2879 = torch.constant.int 1
    %int7_2880 = torch.constant.int 7
    %int25_2881 = torch.constant.int 25
    %int64_2882 = torch.constant.int 64
    %3669 = torch.prim.ListConstruct %int1_2879, %int7_2880, %int25_2881, %int64_2882 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3670 = torch.aten.reshape %3667#0, %3669 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2883 = torch.constant.int 1
    %int2_2884 = torch.constant.int 2
    %3671 = torch.aten.transpose.int %3670, %int1_2883, %int2_2884 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3672 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2885 = torch.constant.int 1
    %int7_2886 = torch.constant.int 7
    %int25_2887 = torch.constant.int 25
    %int64_2888 = torch.constant.int 64
    %3673 = torch.prim.ListConstruct %int1_2885, %int7_2886, %int25_2887, %int64_2888 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3674 = torch.aten.reshape %3667#1, %3673 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2889 = torch.constant.int 1
    %int2_2890 = torch.constant.int 2
    %3675 = torch.aten.transpose.int %3674, %int1_2889, %int2_2890 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3676 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2891 = torch.constant.int 1
    %int7_2892 = torch.constant.int 7
    %int25_2893 = torch.constant.int 25
    %int64_2894 = torch.constant.int 64
    %3677 = torch.prim.ListConstruct %int1_2891, %int7_2892, %int25_2893, %int64_2894 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3678 = torch.aten.reshape %3667#2, %3677 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2895 = torch.constant.int 1
    %int2_2896 = torch.constant.int 2
    %3679 = torch.aten.transpose.int %3678, %int1_2895, %int2_2896 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2897 = torch.constant.int 1
    %int2_2898 = torch.constant.int 2
    %3680 = torch.aten.transpose.int %3674, %int1_2897, %int2_2898 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_2899 = torch.constant.int 2
    %int3_2900 = torch.constant.int 3
    %3681 = torch.aten.transpose.int %3680, %int2_2899, %int3_2900 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %3682 = torch.aten.matmul %3671, %3681 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3683 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3684 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3685 = torch.aten.pow.Tensor_Tensor %3683, %3684 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %3686 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_2901 = torch.constant.int 0
    %3687 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_2902 = torch.constant.none
    %float0.000000e00_2903 = torch.constant.float 0.000000e+00
    %3688 = torch.aten.full %3687, %float0.000000e00_2903, %none_2902, %none_2902, %none_2902, %none_2902 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_2904 = torch.constant.int 1
    %3689 = torch.aten.add.Tensor %3688, %3685, %int1_2904 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %3690 = torch.aten.div.Tensor %3682, %3689 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3691 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_2905 = torch.constant.int 6
    %none_2906 = torch.constant.none
    %false_2907 = torch.constant.bool false
    %3692 = torch.aten.to.dtype %3690, %int6_2905, %false_2907, %false_2907, %none_2906 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3693 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %3694 = torch.aten.where.self %3691, %3692, %3693 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_2908 = torch.constant.int 3
    %none_2909 = torch.constant.none
    %3695 = torch.aten.softmax.int %3694, %int3_2908, %none_2909 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_2910 = torch.constant.int 6
    %none_2911 = torch.constant.none
    %false_2912 = torch.constant.bool false
    %3696 = torch.aten.to.dtype %3695, %int6_2910, %false_2912, %false_2912, %none_2911 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3697 = torch.aten.matmul %3696, %3679 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2913 = torch.constant.int 1
    %int2_2914 = torch.constant.int 2
    %3698 = torch.aten.transpose.int %3697, %int1_2913, %int2_2914 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %3699 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2915 = torch.constant.int 1
    %int7_2916 = torch.constant.int 7
    %int1600_2917 = torch.constant.int 1600
    %3700 = torch.prim.ListConstruct %int1_2915, %int7_2916, %int1600_2917 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3701 = torch.aten.reshape %3698, %3700 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %3702 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2918 = torch.constant.int 7
    %int1600_2919 = torch.constant.int 1600
    %3703 = torch.prim.ListConstruct %int7_2918, %int1600_2919 : (!torch.int, !torch.int) -> !torch.list<int>
    %3704 = torch.aten.reshape %3701, %3703 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2920 = torch.constant.int 0
    %int1_2921 = torch.constant.int 1
    %3705 = torch.aten.mm %3704, %426 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3706 = torch.aten.add.Tensor %3705, %427, %int1_2921 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3707 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2922 = torch.constant.int 1
    %int7_2923 = torch.constant.int 7
    %int1600_2924 = torch.constant.int 1600
    %3708 = torch.prim.ListConstruct %int1_2922, %int7_2923, %int1600_2924 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3709 = torch.aten.reshape %3706, %3708 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2925 = torch.constant.int 1
    %3710 = torch.aten.add.Tensor %3709, %3654, %int1_2925 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2926 = torch.constant.float 9.9999997473787516E-6
    %int1600_2927 = torch.constant.int 1600
    %3711 = torch.prim.ListConstruct %int1600_2927 : (!torch.int) -> !torch.list<int>
    %result0_2928, %result1_2929, %result2_2930 = torch.aten.native_layer_norm %3710, %3711, %428, %429, %float9.999990e-06_2926 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3712 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2931 = torch.constant.int 7
    %int1600_2932 = torch.constant.int 1600
    %3713 = torch.prim.ListConstruct %int7_2931, %int1600_2932 : (!torch.int, !torch.int) -> !torch.list<int>
    %3714 = torch.aten.reshape %result0_2928, %3713 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2933 = torch.constant.int 0
    %int1_2934 = torch.constant.int 1
    %3715 = torch.aten.mm %3714, %430 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %3716 = torch.aten.add.Tensor %3715, %431, %int1_2934 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %3717 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2935 = torch.constant.int 1
    %int7_2936 = torch.constant.int 7
    %int6400_2937 = torch.constant.int 6400
    %3718 = torch.prim.ListConstruct %int1_2935, %int7_2936, %int6400_2937 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3719 = torch.aten.reshape %3716, %3718 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %3720 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3721 = torch.aten.mul.Tensor %3719, %3720 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3722 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %3723 = torch.aten.pow.Tensor_Tensor %3719, %3722 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3724 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3725 = torch.aten.mul.Tensor %3723, %3724 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_2938 = torch.constant.int 1
    %3726 = torch.aten.add.Tensor %3719, %3725, %int1_2938 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3727 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %3728 = torch.aten.mul.Tensor %3726, %3727 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3729 = torch.aten.tanh %3728 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3730 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_2939 = torch.constant.int 1
    %3731 = torch.aten.add.Tensor %3729, %3730, %int1_2939 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3732 = torch.aten.mul.Tensor %3721, %3731 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3733 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2940 = torch.constant.int 7
    %int6400_2941 = torch.constant.int 6400
    %3734 = torch.prim.ListConstruct %int7_2940, %int6400_2941 : (!torch.int, !torch.int) -> !torch.list<int>
    %3735 = torch.aten.reshape %3732, %3734 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_2942 = torch.constant.int 0
    %int1_2943 = torch.constant.int 1
    %3736 = torch.aten.mm %3735, %432 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3737 = torch.aten.add.Tensor %3736, %433, %int1_2943 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3738 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2944 = torch.constant.int 1
    %int7_2945 = torch.constant.int 7
    %int1600_2946 = torch.constant.int 1600
    %3739 = torch.prim.ListConstruct %int1_2944, %int7_2945, %int1600_2946 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3740 = torch.aten.reshape %3737, %3739 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_2947 = torch.constant.int 1
    %3741 = torch.aten.add.Tensor %3710, %3740, %int1_2947 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_2948 = torch.constant.float 9.9999997473787516E-6
    %int1600_2949 = torch.constant.int 1600
    %3742 = torch.prim.ListConstruct %int1600_2949 : (!torch.int) -> !torch.list<int>
    %result0_2950, %result1_2951, %result2_2952 = torch.aten.native_layer_norm %3741, %3742, %434, %435, %float9.999990e-06_2948 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3743 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_2953 = torch.constant.int 7
    %int1600_2954 = torch.constant.int 1600
    %3744 = torch.prim.ListConstruct %int7_2953, %int1600_2954 : (!torch.int, !torch.int) -> !torch.list<int>
    %3745 = torch.aten.reshape %result0_2950, %3744 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_2955 = torch.constant.int 0
    %int1_2956 = torch.constant.int 1
    %3746 = torch.aten.mm %3745, %436 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %3747 = torch.aten.add.Tensor %3746, %437, %int1_2956 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %3748 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2957 = torch.constant.int 1
    %int7_2958 = torch.constant.int 7
    %int4800_2959 = torch.constant.int 4800
    %3749 = torch.prim.ListConstruct %int1_2957, %int7_2958, %int4800_2959 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3750 = torch.aten.reshape %3747, %3749 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %3751 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %3752 = torch.prim.tolist(%3751) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_2960 = torch.constant.int 2
    %3753 = torch.aten.split_with_sizes %3750, %3752, %int2_2960 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %3754:3 = torch.prim.ListUnpack %3753 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %3755 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2961 = torch.constant.int 1
    %int7_2962 = torch.constant.int 7
    %int25_2963 = torch.constant.int 25
    %int64_2964 = torch.constant.int 64
    %3756 = torch.prim.ListConstruct %int1_2961, %int7_2962, %int25_2963, %int64_2964 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3757 = torch.aten.reshape %3754#0, %3756 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2965 = torch.constant.int 1
    %int2_2966 = torch.constant.int 2
    %3758 = torch.aten.transpose.int %3757, %int1_2965, %int2_2966 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3759 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2967 = torch.constant.int 1
    %int7_2968 = torch.constant.int 7
    %int25_2969 = torch.constant.int 25
    %int64_2970 = torch.constant.int 64
    %3760 = torch.prim.ListConstruct %int1_2967, %int7_2968, %int25_2969, %int64_2970 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3761 = torch.aten.reshape %3754#1, %3760 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2971 = torch.constant.int 1
    %int2_2972 = torch.constant.int 2
    %3762 = torch.aten.transpose.int %3761, %int1_2971, %int2_2972 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3763 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_2973 = torch.constant.int 1
    %int7_2974 = torch.constant.int 7
    %int25_2975 = torch.constant.int 25
    %int64_2976 = torch.constant.int 64
    %3764 = torch.prim.ListConstruct %int1_2973, %int7_2974, %int25_2975, %int64_2976 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3765 = torch.aten.reshape %3754#2, %3764 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_2977 = torch.constant.int 1
    %int2_2978 = torch.constant.int 2
    %3766 = torch.aten.transpose.int %3765, %int1_2977, %int2_2978 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2979 = torch.constant.int 1
    %int2_2980 = torch.constant.int 2
    %3767 = torch.aten.transpose.int %3761, %int1_2979, %int2_2980 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_2981 = torch.constant.int 2
    %int3_2982 = torch.constant.int 3
    %3768 = torch.aten.transpose.int %3767, %int2_2981, %int3_2982 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %3769 = torch.aten.matmul %3758, %3768 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3770 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3771 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3772 = torch.aten.pow.Tensor_Tensor %3770, %3771 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %3773 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_2983 = torch.constant.int 0
    %3774 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_2984 = torch.constant.none
    %float0.000000e00_2985 = torch.constant.float 0.000000e+00
    %3775 = torch.aten.full %3774, %float0.000000e00_2985, %none_2984, %none_2984, %none_2984, %none_2984 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_2986 = torch.constant.int 1
    %3776 = torch.aten.add.Tensor %3775, %3772, %int1_2986 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %3777 = torch.aten.div.Tensor %3769, %3776 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3778 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_2987 = torch.constant.int 6
    %none_2988 = torch.constant.none
    %false_2989 = torch.constant.bool false
    %3779 = torch.aten.to.dtype %3777, %int6_2987, %false_2989, %false_2989, %none_2988 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3780 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %3781 = torch.aten.where.self %3778, %3779, %3780 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_2990 = torch.constant.int 3
    %none_2991 = torch.constant.none
    %3782 = torch.aten.softmax.int %3781, %int3_2990, %none_2991 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_2992 = torch.constant.int 6
    %none_2993 = torch.constant.none
    %false_2994 = torch.constant.bool false
    %3783 = torch.aten.to.dtype %3782, %int6_2992, %false_2994, %false_2994, %none_2993 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3784 = torch.aten.matmul %3783, %3766 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_2995 = torch.constant.int 1
    %int2_2996 = torch.constant.int 2
    %3785 = torch.aten.transpose.int %3784, %int1_2995, %int2_2996 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %3786 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_2997 = torch.constant.int 1
    %int7_2998 = torch.constant.int 7
    %int1600_2999 = torch.constant.int 1600
    %3787 = torch.prim.ListConstruct %int1_2997, %int7_2998, %int1600_2999 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3788 = torch.aten.reshape %3785, %3787 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %3789 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3000 = torch.constant.int 7
    %int1600_3001 = torch.constant.int 1600
    %3790 = torch.prim.ListConstruct %int7_3000, %int1600_3001 : (!torch.int, !torch.int) -> !torch.list<int>
    %3791 = torch.aten.reshape %3788, %3790 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3002 = torch.constant.int 0
    %int1_3003 = torch.constant.int 1
    %3792 = torch.aten.mm %3791, %438 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3793 = torch.aten.add.Tensor %3792, %439, %int1_3003 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3794 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3004 = torch.constant.int 1
    %int7_3005 = torch.constant.int 7
    %int1600_3006 = torch.constant.int 1600
    %3795 = torch.prim.ListConstruct %int1_3004, %int7_3005, %int1600_3006 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3796 = torch.aten.reshape %3793, %3795 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3007 = torch.constant.int 1
    %3797 = torch.aten.add.Tensor %3796, %3741, %int1_3007 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3008 = torch.constant.float 9.9999997473787516E-6
    %int1600_3009 = torch.constant.int 1600
    %3798 = torch.prim.ListConstruct %int1600_3009 : (!torch.int) -> !torch.list<int>
    %result0_3010, %result1_3011, %result2_3012 = torch.aten.native_layer_norm %3797, %3798, %440, %441, %float9.999990e-06_3008 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3799 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3013 = torch.constant.int 7
    %int1600_3014 = torch.constant.int 1600
    %3800 = torch.prim.ListConstruct %int7_3013, %int1600_3014 : (!torch.int, !torch.int) -> !torch.list<int>
    %3801 = torch.aten.reshape %result0_3010, %3800 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3015 = torch.constant.int 0
    %int1_3016 = torch.constant.int 1
    %3802 = torch.aten.mm %3801, %442 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %3803 = torch.aten.add.Tensor %3802, %443, %int1_3016 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %3804 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3017 = torch.constant.int 1
    %int7_3018 = torch.constant.int 7
    %int6400_3019 = torch.constant.int 6400
    %3805 = torch.prim.ListConstruct %int1_3017, %int7_3018, %int6400_3019 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3806 = torch.aten.reshape %3803, %3805 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %3807 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3808 = torch.aten.mul.Tensor %3806, %3807 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3809 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %3810 = torch.aten.pow.Tensor_Tensor %3806, %3809 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3811 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3812 = torch.aten.mul.Tensor %3810, %3811 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_3020 = torch.constant.int 1
    %3813 = torch.aten.add.Tensor %3806, %3812, %int1_3020 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3814 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %3815 = torch.aten.mul.Tensor %3813, %3814 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3816 = torch.aten.tanh %3815 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3817 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_3021 = torch.constant.int 1
    %3818 = torch.aten.add.Tensor %3816, %3817, %int1_3021 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3819 = torch.aten.mul.Tensor %3808, %3818 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3820 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3022 = torch.constant.int 7
    %int6400_3023 = torch.constant.int 6400
    %3821 = torch.prim.ListConstruct %int7_3022, %int6400_3023 : (!torch.int, !torch.int) -> !torch.list<int>
    %3822 = torch.aten.reshape %3819, %3821 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_3024 = torch.constant.int 0
    %int1_3025 = torch.constant.int 1
    %3823 = torch.aten.mm %3822, %444 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3824 = torch.aten.add.Tensor %3823, %445, %int1_3025 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3825 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3026 = torch.constant.int 1
    %int7_3027 = torch.constant.int 7
    %int1600_3028 = torch.constant.int 1600
    %3826 = torch.prim.ListConstruct %int1_3026, %int7_3027, %int1600_3028 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3827 = torch.aten.reshape %3824, %3826 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3029 = torch.constant.int 1
    %3828 = torch.aten.add.Tensor %3797, %3827, %int1_3029 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3030 = torch.constant.float 9.9999997473787516E-6
    %int1600_3031 = torch.constant.int 1600
    %3829 = torch.prim.ListConstruct %int1600_3031 : (!torch.int) -> !torch.list<int>
    %result0_3032, %result1_3033, %result2_3034 = torch.aten.native_layer_norm %3828, %3829, %446, %447, %float9.999990e-06_3030 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3830 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3035 = torch.constant.int 7
    %int1600_3036 = torch.constant.int 1600
    %3831 = torch.prim.ListConstruct %int7_3035, %int1600_3036 : (!torch.int, !torch.int) -> !torch.list<int>
    %3832 = torch.aten.reshape %result0_3032, %3831 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3037 = torch.constant.int 0
    %int1_3038 = torch.constant.int 1
    %3833 = torch.aten.mm %3832, %448 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %3834 = torch.aten.add.Tensor %3833, %449, %int1_3038 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %3835 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3039 = torch.constant.int 1
    %int7_3040 = torch.constant.int 7
    %int4800_3041 = torch.constant.int 4800
    %3836 = torch.prim.ListConstruct %int1_3039, %int7_3040, %int4800_3041 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3837 = torch.aten.reshape %3834, %3836 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %3838 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %3839 = torch.prim.tolist(%3838) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_3042 = torch.constant.int 2
    %3840 = torch.aten.split_with_sizes %3837, %3839, %int2_3042 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %3841:3 = torch.prim.ListUnpack %3840 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %3842 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3043 = torch.constant.int 1
    %int7_3044 = torch.constant.int 7
    %int25_3045 = torch.constant.int 25
    %int64_3046 = torch.constant.int 64
    %3843 = torch.prim.ListConstruct %int1_3043, %int7_3044, %int25_3045, %int64_3046 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3844 = torch.aten.reshape %3841#0, %3843 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3047 = torch.constant.int 1
    %int2_3048 = torch.constant.int 2
    %3845 = torch.aten.transpose.int %3844, %int1_3047, %int2_3048 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3846 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3049 = torch.constant.int 1
    %int7_3050 = torch.constant.int 7
    %int25_3051 = torch.constant.int 25
    %int64_3052 = torch.constant.int 64
    %3847 = torch.prim.ListConstruct %int1_3049, %int7_3050, %int25_3051, %int64_3052 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3848 = torch.aten.reshape %3841#1, %3847 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3053 = torch.constant.int 1
    %int2_3054 = torch.constant.int 2
    %3849 = torch.aten.transpose.int %3848, %int1_3053, %int2_3054 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3850 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3055 = torch.constant.int 1
    %int7_3056 = torch.constant.int 7
    %int25_3057 = torch.constant.int 25
    %int64_3058 = torch.constant.int 64
    %3851 = torch.prim.ListConstruct %int1_3055, %int7_3056, %int25_3057, %int64_3058 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3852 = torch.aten.reshape %3841#2, %3851 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3059 = torch.constant.int 1
    %int2_3060 = torch.constant.int 2
    %3853 = torch.aten.transpose.int %3852, %int1_3059, %int2_3060 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3061 = torch.constant.int 1
    %int2_3062 = torch.constant.int 2
    %3854 = torch.aten.transpose.int %3848, %int1_3061, %int2_3062 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_3063 = torch.constant.int 2
    %int3_3064 = torch.constant.int 3
    %3855 = torch.aten.transpose.int %3854, %int2_3063, %int3_3064 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %3856 = torch.aten.matmul %3845, %3855 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3857 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3858 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3859 = torch.aten.pow.Tensor_Tensor %3857, %3858 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %3860 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_3065 = torch.constant.int 0
    %3861 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_3066 = torch.constant.none
    %float0.000000e00_3067 = torch.constant.float 0.000000e+00
    %3862 = torch.aten.full %3861, %float0.000000e00_3067, %none_3066, %none_3066, %none_3066, %none_3066 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_3068 = torch.constant.int 1
    %3863 = torch.aten.add.Tensor %3862, %3859, %int1_3068 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %3864 = torch.aten.div.Tensor %3856, %3863 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3865 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_3069 = torch.constant.int 6
    %none_3070 = torch.constant.none
    %false_3071 = torch.constant.bool false
    %3866 = torch.aten.to.dtype %3864, %int6_3069, %false_3071, %false_3071, %none_3070 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3867 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %3868 = torch.aten.where.self %3865, %3866, %3867 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_3072 = torch.constant.int 3
    %none_3073 = torch.constant.none
    %3869 = torch.aten.softmax.int %3868, %int3_3072, %none_3073 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_3074 = torch.constant.int 6
    %none_3075 = torch.constant.none
    %false_3076 = torch.constant.bool false
    %3870 = torch.aten.to.dtype %3869, %int6_3074, %false_3076, %false_3076, %none_3075 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3871 = torch.aten.matmul %3870, %3853 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3077 = torch.constant.int 1
    %int2_3078 = torch.constant.int 2
    %3872 = torch.aten.transpose.int %3871, %int1_3077, %int2_3078 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %3873 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3079 = torch.constant.int 1
    %int7_3080 = torch.constant.int 7
    %int1600_3081 = torch.constant.int 1600
    %3874 = torch.prim.ListConstruct %int1_3079, %int7_3080, %int1600_3081 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3875 = torch.aten.reshape %3872, %3874 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %3876 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3082 = torch.constant.int 7
    %int1600_3083 = torch.constant.int 1600
    %3877 = torch.prim.ListConstruct %int7_3082, %int1600_3083 : (!torch.int, !torch.int) -> !torch.list<int>
    %3878 = torch.aten.reshape %3875, %3877 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3084 = torch.constant.int 0
    %int1_3085 = torch.constant.int 1
    %3879 = torch.aten.mm %3878, %450 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3880 = torch.aten.add.Tensor %3879, %451, %int1_3085 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3881 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3086 = torch.constant.int 1
    %int7_3087 = torch.constant.int 7
    %int1600_3088 = torch.constant.int 1600
    %3882 = torch.prim.ListConstruct %int1_3086, %int7_3087, %int1600_3088 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3883 = torch.aten.reshape %3880, %3882 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3089 = torch.constant.int 1
    %3884 = torch.aten.add.Tensor %3883, %3828, %int1_3089 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3090 = torch.constant.float 9.9999997473787516E-6
    %int1600_3091 = torch.constant.int 1600
    %3885 = torch.prim.ListConstruct %int1600_3091 : (!torch.int) -> !torch.list<int>
    %result0_3092, %result1_3093, %result2_3094 = torch.aten.native_layer_norm %3884, %3885, %452, %453, %float9.999990e-06_3090 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3886 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3095 = torch.constant.int 7
    %int1600_3096 = torch.constant.int 1600
    %3887 = torch.prim.ListConstruct %int7_3095, %int1600_3096 : (!torch.int, !torch.int) -> !torch.list<int>
    %3888 = torch.aten.reshape %result0_3092, %3887 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3097 = torch.constant.int 0
    %int1_3098 = torch.constant.int 1
    %3889 = torch.aten.mm %3888, %454 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %3890 = torch.aten.add.Tensor %3889, %455, %int1_3098 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %3891 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3099 = torch.constant.int 1
    %int7_3100 = torch.constant.int 7
    %int6400_3101 = torch.constant.int 6400
    %3892 = torch.prim.ListConstruct %int1_3099, %int7_3100, %int6400_3101 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3893 = torch.aten.reshape %3890, %3892 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %3894 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3895 = torch.aten.mul.Tensor %3893, %3894 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3896 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %3897 = torch.aten.pow.Tensor_Tensor %3893, %3896 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3898 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3899 = torch.aten.mul.Tensor %3897, %3898 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_3102 = torch.constant.int 1
    %3900 = torch.aten.add.Tensor %3893, %3899, %int1_3102 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3901 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %3902 = torch.aten.mul.Tensor %3900, %3901 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3903 = torch.aten.tanh %3902 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3904 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_3103 = torch.constant.int 1
    %3905 = torch.aten.add.Tensor %3903, %3904, %int1_3103 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3906 = torch.aten.mul.Tensor %3895, %3905 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3907 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3104 = torch.constant.int 7
    %int6400_3105 = torch.constant.int 6400
    %3908 = torch.prim.ListConstruct %int7_3104, %int6400_3105 : (!torch.int, !torch.int) -> !torch.list<int>
    %3909 = torch.aten.reshape %3906, %3908 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_3106 = torch.constant.int 0
    %int1_3107 = torch.constant.int 1
    %3910 = torch.aten.mm %3909, %456 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3911 = torch.aten.add.Tensor %3910, %457, %int1_3107 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3912 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3108 = torch.constant.int 1
    %int7_3109 = torch.constant.int 7
    %int1600_3110 = torch.constant.int 1600
    %3913 = torch.prim.ListConstruct %int1_3108, %int7_3109, %int1600_3110 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3914 = torch.aten.reshape %3911, %3913 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3111 = torch.constant.int 1
    %3915 = torch.aten.add.Tensor %3884, %3914, %int1_3111 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3112 = torch.constant.float 9.9999997473787516E-6
    %int1600_3113 = torch.constant.int 1600
    %3916 = torch.prim.ListConstruct %int1600_3113 : (!torch.int) -> !torch.list<int>
    %result0_3114, %result1_3115, %result2_3116 = torch.aten.native_layer_norm %3915, %3916, %458, %459, %float9.999990e-06_3112 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3917 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3117 = torch.constant.int 7
    %int1600_3118 = torch.constant.int 1600
    %3918 = torch.prim.ListConstruct %int7_3117, %int1600_3118 : (!torch.int, !torch.int) -> !torch.list<int>
    %3919 = torch.aten.reshape %result0_3114, %3918 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3119 = torch.constant.int 0
    %int1_3120 = torch.constant.int 1
    %3920 = torch.aten.mm %3919, %460 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %3921 = torch.aten.add.Tensor %3920, %461, %int1_3120 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %3922 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3121 = torch.constant.int 1
    %int7_3122 = torch.constant.int 7
    %int4800_3123 = torch.constant.int 4800
    %3923 = torch.prim.ListConstruct %int1_3121, %int7_3122, %int4800_3123 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3924 = torch.aten.reshape %3921, %3923 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %3925 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %3926 = torch.prim.tolist(%3925) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_3124 = torch.constant.int 2
    %3927 = torch.aten.split_with_sizes %3924, %3926, %int2_3124 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %3928:3 = torch.prim.ListUnpack %3927 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %3929 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3125 = torch.constant.int 1
    %int7_3126 = torch.constant.int 7
    %int25_3127 = torch.constant.int 25
    %int64_3128 = torch.constant.int 64
    %3930 = torch.prim.ListConstruct %int1_3125, %int7_3126, %int25_3127, %int64_3128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3931 = torch.aten.reshape %3928#0, %3930 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3129 = torch.constant.int 1
    %int2_3130 = torch.constant.int 2
    %3932 = torch.aten.transpose.int %3931, %int1_3129, %int2_3130 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3933 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3131 = torch.constant.int 1
    %int7_3132 = torch.constant.int 7
    %int25_3133 = torch.constant.int 25
    %int64_3134 = torch.constant.int 64
    %3934 = torch.prim.ListConstruct %int1_3131, %int7_3132, %int25_3133, %int64_3134 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3935 = torch.aten.reshape %3928#1, %3934 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3135 = torch.constant.int 1
    %int2_3136 = torch.constant.int 2
    %3936 = torch.aten.transpose.int %3935, %int1_3135, %int2_3136 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %3937 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3137 = torch.constant.int 1
    %int7_3138 = torch.constant.int 7
    %int25_3139 = torch.constant.int 25
    %int64_3140 = torch.constant.int 64
    %3938 = torch.prim.ListConstruct %int1_3137, %int7_3138, %int25_3139, %int64_3140 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3939 = torch.aten.reshape %3928#2, %3938 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3141 = torch.constant.int 1
    %int2_3142 = torch.constant.int 2
    %3940 = torch.aten.transpose.int %3939, %int1_3141, %int2_3142 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3143 = torch.constant.int 1
    %int2_3144 = torch.constant.int 2
    %3941 = torch.aten.transpose.int %3935, %int1_3143, %int2_3144 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_3145 = torch.constant.int 2
    %int3_3146 = torch.constant.int 3
    %3942 = torch.aten.transpose.int %3941, %int2_3145, %int3_3146 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %3943 = torch.aten.matmul %3932, %3942 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3944 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3945 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3946 = torch.aten.pow.Tensor_Tensor %3944, %3945 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %3947 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_3147 = torch.constant.int 0
    %3948 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_3148 = torch.constant.none
    %float0.000000e00_3149 = torch.constant.float 0.000000e+00
    %3949 = torch.aten.full %3948, %float0.000000e00_3149, %none_3148, %none_3148, %none_3148, %none_3148 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_3150 = torch.constant.int 1
    %3950 = torch.aten.add.Tensor %3949, %3946, %int1_3150 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %3951 = torch.aten.div.Tensor %3943, %3950 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %3952 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_3151 = torch.constant.int 6
    %none_3152 = torch.constant.none
    %false_3153 = torch.constant.bool false
    %3953 = torch.aten.to.dtype %3951, %int6_3151, %false_3153, %false_3153, %none_3152 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3954 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %3955 = torch.aten.where.self %3952, %3953, %3954 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_3154 = torch.constant.int 3
    %none_3155 = torch.constant.none
    %3956 = torch.aten.softmax.int %3955, %int3_3154, %none_3155 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_3156 = torch.constant.int 6
    %none_3157 = torch.constant.none
    %false_3158 = torch.constant.bool false
    %3957 = torch.aten.to.dtype %3956, %int6_3156, %false_3158, %false_3158, %none_3157 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %3958 = torch.aten.matmul %3957, %3940 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3159 = torch.constant.int 1
    %int2_3160 = torch.constant.int 2
    %3959 = torch.aten.transpose.int %3958, %int1_3159, %int2_3160 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %3960 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3161 = torch.constant.int 1
    %int7_3162 = torch.constant.int 7
    %int1600_3163 = torch.constant.int 1600
    %3961 = torch.prim.ListConstruct %int1_3161, %int7_3162, %int1600_3163 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3962 = torch.aten.reshape %3959, %3961 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %3963 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3164 = torch.constant.int 7
    %int1600_3165 = torch.constant.int 1600
    %3964 = torch.prim.ListConstruct %int7_3164, %int1600_3165 : (!torch.int, !torch.int) -> !torch.list<int>
    %3965 = torch.aten.reshape %3962, %3964 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3166 = torch.constant.int 0
    %int1_3167 = torch.constant.int 1
    %3966 = torch.aten.mm %3965, %462 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3967 = torch.aten.add.Tensor %3966, %463, %int1_3167 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3968 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3168 = torch.constant.int 1
    %int7_3169 = torch.constant.int 7
    %int1600_3170 = torch.constant.int 1600
    %3969 = torch.prim.ListConstruct %int1_3168, %int7_3169, %int1600_3170 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3970 = torch.aten.reshape %3967, %3969 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3171 = torch.constant.int 1
    %3971 = torch.aten.add.Tensor %3970, %3915, %int1_3171 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3172 = torch.constant.float 9.9999997473787516E-6
    %int1600_3173 = torch.constant.int 1600
    %3972 = torch.prim.ListConstruct %int1600_3173 : (!torch.int) -> !torch.list<int>
    %result0_3174, %result1_3175, %result2_3176 = torch.aten.native_layer_norm %3971, %3972, %464, %465, %float9.999990e-06_3172 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %3973 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3177 = torch.constant.int 7
    %int1600_3178 = torch.constant.int 1600
    %3974 = torch.prim.ListConstruct %int7_3177, %int1600_3178 : (!torch.int, !torch.int) -> !torch.list<int>
    %3975 = torch.aten.reshape %result0_3174, %3974 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3179 = torch.constant.int 0
    %int1_3180 = torch.constant.int 1
    %3976 = torch.aten.mm %3975, %466 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %3977 = torch.aten.add.Tensor %3976, %467, %int1_3180 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %3978 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3181 = torch.constant.int 1
    %int7_3182 = torch.constant.int 7
    %int6400_3183 = torch.constant.int 6400
    %3979 = torch.prim.ListConstruct %int1_3181, %int7_3182, %int6400_3183 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3980 = torch.aten.reshape %3977, %3979 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %3981 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %3982 = torch.aten.mul.Tensor %3980, %3981 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3983 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %3984 = torch.aten.pow.Tensor_Tensor %3980, %3983 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3985 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %3986 = torch.aten.mul.Tensor %3984, %3985 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_3184 = torch.constant.int 1
    %3987 = torch.aten.add.Tensor %3980, %3986, %int1_3184 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3988 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %3989 = torch.aten.mul.Tensor %3987, %3988 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3990 = torch.aten.tanh %3989 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3991 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_3185 = torch.constant.int 1
    %3992 = torch.aten.add.Tensor %3990, %3991, %int1_3185 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %3993 = torch.aten.mul.Tensor %3982, %3992 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %3994 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3186 = torch.constant.int 7
    %int6400_3187 = torch.constant.int 6400
    %3995 = torch.prim.ListConstruct %int7_3186, %int6400_3187 : (!torch.int, !torch.int) -> !torch.list<int>
    %3996 = torch.aten.reshape %3993, %3995 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_3188 = torch.constant.int 0
    %int1_3189 = torch.constant.int 1
    %3997 = torch.aten.mm %3996, %468 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %3998 = torch.aten.add.Tensor %3997, %469, %int1_3189 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %3999 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3190 = torch.constant.int 1
    %int7_3191 = torch.constant.int 7
    %int1600_3192 = torch.constant.int 1600
    %4000 = torch.prim.ListConstruct %int1_3190, %int7_3191, %int1600_3192 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4001 = torch.aten.reshape %3998, %4000 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3193 = torch.constant.int 1
    %4002 = torch.aten.add.Tensor %3971, %4001, %int1_3193 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3194 = torch.constant.float 9.9999997473787516E-6
    %int1600_3195 = torch.constant.int 1600
    %4003 = torch.prim.ListConstruct %int1600_3195 : (!torch.int) -> !torch.list<int>
    %result0_3196, %result1_3197, %result2_3198 = torch.aten.native_layer_norm %4002, %4003, %470, %471, %float9.999990e-06_3194 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4004 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3199 = torch.constant.int 7
    %int1600_3200 = torch.constant.int 1600
    %4005 = torch.prim.ListConstruct %int7_3199, %int1600_3200 : (!torch.int, !torch.int) -> !torch.list<int>
    %4006 = torch.aten.reshape %result0_3196, %4005 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3201 = torch.constant.int 0
    %int1_3202 = torch.constant.int 1
    %4007 = torch.aten.mm %4006, %472 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %4008 = torch.aten.add.Tensor %4007, %473, %int1_3202 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %4009 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3203 = torch.constant.int 1
    %int7_3204 = torch.constant.int 7
    %int4800_3205 = torch.constant.int 4800
    %4010 = torch.prim.ListConstruct %int1_3203, %int7_3204, %int4800_3205 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4011 = torch.aten.reshape %4008, %4010 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %4012 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %4013 = torch.prim.tolist(%4012) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_3206 = torch.constant.int 2
    %4014 = torch.aten.split_with_sizes %4011, %4013, %int2_3206 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %4015:3 = torch.prim.ListUnpack %4014 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %4016 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3207 = torch.constant.int 1
    %int7_3208 = torch.constant.int 7
    %int25_3209 = torch.constant.int 25
    %int64_3210 = torch.constant.int 64
    %4017 = torch.prim.ListConstruct %int1_3207, %int7_3208, %int25_3209, %int64_3210 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4018 = torch.aten.reshape %4015#0, %4017 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3211 = torch.constant.int 1
    %int2_3212 = torch.constant.int 2
    %4019 = torch.aten.transpose.int %4018, %int1_3211, %int2_3212 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4020 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3213 = torch.constant.int 1
    %int7_3214 = torch.constant.int 7
    %int25_3215 = torch.constant.int 25
    %int64_3216 = torch.constant.int 64
    %4021 = torch.prim.ListConstruct %int1_3213, %int7_3214, %int25_3215, %int64_3216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4022 = torch.aten.reshape %4015#1, %4021 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3217 = torch.constant.int 1
    %int2_3218 = torch.constant.int 2
    %4023 = torch.aten.transpose.int %4022, %int1_3217, %int2_3218 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4024 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3219 = torch.constant.int 1
    %int7_3220 = torch.constant.int 7
    %int25_3221 = torch.constant.int 25
    %int64_3222 = torch.constant.int 64
    %4025 = torch.prim.ListConstruct %int1_3219, %int7_3220, %int25_3221, %int64_3222 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4026 = torch.aten.reshape %4015#2, %4025 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3223 = torch.constant.int 1
    %int2_3224 = torch.constant.int 2
    %4027 = torch.aten.transpose.int %4026, %int1_3223, %int2_3224 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3225 = torch.constant.int 1
    %int2_3226 = torch.constant.int 2
    %4028 = torch.aten.transpose.int %4022, %int1_3225, %int2_3226 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_3227 = torch.constant.int 2
    %int3_3228 = torch.constant.int 3
    %4029 = torch.aten.transpose.int %4028, %int2_3227, %int3_3228 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %4030 = torch.aten.matmul %4019, %4029 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4031 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4032 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4033 = torch.aten.pow.Tensor_Tensor %4031, %4032 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %4034 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_3229 = torch.constant.int 0
    %4035 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_3230 = torch.constant.none
    %float0.000000e00_3231 = torch.constant.float 0.000000e+00
    %4036 = torch.aten.full %4035, %float0.000000e00_3231, %none_3230, %none_3230, %none_3230, %none_3230 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_3232 = torch.constant.int 1
    %4037 = torch.aten.add.Tensor %4036, %4033, %int1_3232 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %4038 = torch.aten.div.Tensor %4030, %4037 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4039 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_3233 = torch.constant.int 6
    %none_3234 = torch.constant.none
    %false_3235 = torch.constant.bool false
    %4040 = torch.aten.to.dtype %4038, %int6_3233, %false_3235, %false_3235, %none_3234 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4041 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %4042 = torch.aten.where.self %4039, %4040, %4041 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_3236 = torch.constant.int 3
    %none_3237 = torch.constant.none
    %4043 = torch.aten.softmax.int %4042, %int3_3236, %none_3237 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_3238 = torch.constant.int 6
    %none_3239 = torch.constant.none
    %false_3240 = torch.constant.bool false
    %4044 = torch.aten.to.dtype %4043, %int6_3238, %false_3240, %false_3240, %none_3239 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4045 = torch.aten.matmul %4044, %4027 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3241 = torch.constant.int 1
    %int2_3242 = torch.constant.int 2
    %4046 = torch.aten.transpose.int %4045, %int1_3241, %int2_3242 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %4047 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3243 = torch.constant.int 1
    %int7_3244 = torch.constant.int 7
    %int1600_3245 = torch.constant.int 1600
    %4048 = torch.prim.ListConstruct %int1_3243, %int7_3244, %int1600_3245 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4049 = torch.aten.reshape %4046, %4048 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %4050 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3246 = torch.constant.int 7
    %int1600_3247 = torch.constant.int 1600
    %4051 = torch.prim.ListConstruct %int7_3246, %int1600_3247 : (!torch.int, !torch.int) -> !torch.list<int>
    %4052 = torch.aten.reshape %4049, %4051 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3248 = torch.constant.int 0
    %int1_3249 = torch.constant.int 1
    %4053 = torch.aten.mm %4052, %474 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4054 = torch.aten.add.Tensor %4053, %475, %int1_3249 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4055 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3250 = torch.constant.int 1
    %int7_3251 = torch.constant.int 7
    %int1600_3252 = torch.constant.int 1600
    %4056 = torch.prim.ListConstruct %int1_3250, %int7_3251, %int1600_3252 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4057 = torch.aten.reshape %4054, %4056 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3253 = torch.constant.int 1
    %4058 = torch.aten.add.Tensor %4057, %4002, %int1_3253 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3254 = torch.constant.float 9.9999997473787516E-6
    %int1600_3255 = torch.constant.int 1600
    %4059 = torch.prim.ListConstruct %int1600_3255 : (!torch.int) -> !torch.list<int>
    %result0_3256, %result1_3257, %result2_3258 = torch.aten.native_layer_norm %4058, %4059, %476, %477, %float9.999990e-06_3254 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4060 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3259 = torch.constant.int 7
    %int1600_3260 = torch.constant.int 1600
    %4061 = torch.prim.ListConstruct %int7_3259, %int1600_3260 : (!torch.int, !torch.int) -> !torch.list<int>
    %4062 = torch.aten.reshape %result0_3256, %4061 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3261 = torch.constant.int 0
    %int1_3262 = torch.constant.int 1
    %4063 = torch.aten.mm %4062, %478 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %4064 = torch.aten.add.Tensor %4063, %479, %int1_3262 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %4065 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3263 = torch.constant.int 1
    %int7_3264 = torch.constant.int 7
    %int6400_3265 = torch.constant.int 6400
    %4066 = torch.prim.ListConstruct %int1_3263, %int7_3264, %int6400_3265 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4067 = torch.aten.reshape %4064, %4066 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %4068 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4069 = torch.aten.mul.Tensor %4067, %4068 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4070 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %4071 = torch.aten.pow.Tensor_Tensor %4067, %4070 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4072 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4073 = torch.aten.mul.Tensor %4071, %4072 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_3266 = torch.constant.int 1
    %4074 = torch.aten.add.Tensor %4067, %4073, %int1_3266 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4075 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %4076 = torch.aten.mul.Tensor %4074, %4075 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4077 = torch.aten.tanh %4076 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4078 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_3267 = torch.constant.int 1
    %4079 = torch.aten.add.Tensor %4077, %4078, %int1_3267 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4080 = torch.aten.mul.Tensor %4069, %4079 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4081 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3268 = torch.constant.int 7
    %int6400_3269 = torch.constant.int 6400
    %4082 = torch.prim.ListConstruct %int7_3268, %int6400_3269 : (!torch.int, !torch.int) -> !torch.list<int>
    %4083 = torch.aten.reshape %4080, %4082 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_3270 = torch.constant.int 0
    %int1_3271 = torch.constant.int 1
    %4084 = torch.aten.mm %4083, %480 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4085 = torch.aten.add.Tensor %4084, %481, %int1_3271 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4086 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3272 = torch.constant.int 1
    %int7_3273 = torch.constant.int 7
    %int1600_3274 = torch.constant.int 1600
    %4087 = torch.prim.ListConstruct %int1_3272, %int7_3273, %int1600_3274 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4088 = torch.aten.reshape %4085, %4087 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3275 = torch.constant.int 1
    %4089 = torch.aten.add.Tensor %4058, %4088, %int1_3275 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3276 = torch.constant.float 9.9999997473787516E-6
    %int1600_3277 = torch.constant.int 1600
    %4090 = torch.prim.ListConstruct %int1600_3277 : (!torch.int) -> !torch.list<int>
    %result0_3278, %result1_3279, %result2_3280 = torch.aten.native_layer_norm %4089, %4090, %482, %483, %float9.999990e-06_3276 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4091 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3281 = torch.constant.int 7
    %int1600_3282 = torch.constant.int 1600
    %4092 = torch.prim.ListConstruct %int7_3281, %int1600_3282 : (!torch.int, !torch.int) -> !torch.list<int>
    %4093 = torch.aten.reshape %result0_3278, %4092 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3283 = torch.constant.int 0
    %int1_3284 = torch.constant.int 1
    %4094 = torch.aten.mm %4093, %484 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %4095 = torch.aten.add.Tensor %4094, %485, %int1_3284 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %4096 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3285 = torch.constant.int 1
    %int7_3286 = torch.constant.int 7
    %int4800_3287 = torch.constant.int 4800
    %4097 = torch.prim.ListConstruct %int1_3285, %int7_3286, %int4800_3287 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4098 = torch.aten.reshape %4095, %4097 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %4099 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %4100 = torch.prim.tolist(%4099) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_3288 = torch.constant.int 2
    %4101 = torch.aten.split_with_sizes %4098, %4100, %int2_3288 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %4102:3 = torch.prim.ListUnpack %4101 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %4103 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3289 = torch.constant.int 1
    %int7_3290 = torch.constant.int 7
    %int25_3291 = torch.constant.int 25
    %int64_3292 = torch.constant.int 64
    %4104 = torch.prim.ListConstruct %int1_3289, %int7_3290, %int25_3291, %int64_3292 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4105 = torch.aten.reshape %4102#0, %4104 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3293 = torch.constant.int 1
    %int2_3294 = torch.constant.int 2
    %4106 = torch.aten.transpose.int %4105, %int1_3293, %int2_3294 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4107 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3295 = torch.constant.int 1
    %int7_3296 = torch.constant.int 7
    %int25_3297 = torch.constant.int 25
    %int64_3298 = torch.constant.int 64
    %4108 = torch.prim.ListConstruct %int1_3295, %int7_3296, %int25_3297, %int64_3298 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4109 = torch.aten.reshape %4102#1, %4108 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3299 = torch.constant.int 1
    %int2_3300 = torch.constant.int 2
    %4110 = torch.aten.transpose.int %4109, %int1_3299, %int2_3300 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4111 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3301 = torch.constant.int 1
    %int7_3302 = torch.constant.int 7
    %int25_3303 = torch.constant.int 25
    %int64_3304 = torch.constant.int 64
    %4112 = torch.prim.ListConstruct %int1_3301, %int7_3302, %int25_3303, %int64_3304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4113 = torch.aten.reshape %4102#2, %4112 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3305 = torch.constant.int 1
    %int2_3306 = torch.constant.int 2
    %4114 = torch.aten.transpose.int %4113, %int1_3305, %int2_3306 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3307 = torch.constant.int 1
    %int2_3308 = torch.constant.int 2
    %4115 = torch.aten.transpose.int %4109, %int1_3307, %int2_3308 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_3309 = torch.constant.int 2
    %int3_3310 = torch.constant.int 3
    %4116 = torch.aten.transpose.int %4115, %int2_3309, %int3_3310 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %4117 = torch.aten.matmul %4106, %4116 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4118 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4119 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4120 = torch.aten.pow.Tensor_Tensor %4118, %4119 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %4121 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_3311 = torch.constant.int 0
    %4122 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_3312 = torch.constant.none
    %float0.000000e00_3313 = torch.constant.float 0.000000e+00
    %4123 = torch.aten.full %4122, %float0.000000e00_3313, %none_3312, %none_3312, %none_3312, %none_3312 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_3314 = torch.constant.int 1
    %4124 = torch.aten.add.Tensor %4123, %4120, %int1_3314 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %4125 = torch.aten.div.Tensor %4117, %4124 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4126 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_3315 = torch.constant.int 6
    %none_3316 = torch.constant.none
    %false_3317 = torch.constant.bool false
    %4127 = torch.aten.to.dtype %4125, %int6_3315, %false_3317, %false_3317, %none_3316 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4128 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %4129 = torch.aten.where.self %4126, %4127, %4128 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_3318 = torch.constant.int 3
    %none_3319 = torch.constant.none
    %4130 = torch.aten.softmax.int %4129, %int3_3318, %none_3319 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_3320 = torch.constant.int 6
    %none_3321 = torch.constant.none
    %false_3322 = torch.constant.bool false
    %4131 = torch.aten.to.dtype %4130, %int6_3320, %false_3322, %false_3322, %none_3321 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4132 = torch.aten.matmul %4131, %4114 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3323 = torch.constant.int 1
    %int2_3324 = torch.constant.int 2
    %4133 = torch.aten.transpose.int %4132, %int1_3323, %int2_3324 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %4134 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3325 = torch.constant.int 1
    %int7_3326 = torch.constant.int 7
    %int1600_3327 = torch.constant.int 1600
    %4135 = torch.prim.ListConstruct %int1_3325, %int7_3326, %int1600_3327 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4136 = torch.aten.reshape %4133, %4135 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %4137 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3328 = torch.constant.int 7
    %int1600_3329 = torch.constant.int 1600
    %4138 = torch.prim.ListConstruct %int7_3328, %int1600_3329 : (!torch.int, !torch.int) -> !torch.list<int>
    %4139 = torch.aten.reshape %4136, %4138 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3330 = torch.constant.int 0
    %int1_3331 = torch.constant.int 1
    %4140 = torch.aten.mm %4139, %486 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4141 = torch.aten.add.Tensor %4140, %487, %int1_3331 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4142 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3332 = torch.constant.int 1
    %int7_3333 = torch.constant.int 7
    %int1600_3334 = torch.constant.int 1600
    %4143 = torch.prim.ListConstruct %int1_3332, %int7_3333, %int1600_3334 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4144 = torch.aten.reshape %4141, %4143 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3335 = torch.constant.int 1
    %4145 = torch.aten.add.Tensor %4144, %4089, %int1_3335 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3336 = torch.constant.float 9.9999997473787516E-6
    %int1600_3337 = torch.constant.int 1600
    %4146 = torch.prim.ListConstruct %int1600_3337 : (!torch.int) -> !torch.list<int>
    %result0_3338, %result1_3339, %result2_3340 = torch.aten.native_layer_norm %4145, %4146, %488, %489, %float9.999990e-06_3336 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4147 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3341 = torch.constant.int 7
    %int1600_3342 = torch.constant.int 1600
    %4148 = torch.prim.ListConstruct %int7_3341, %int1600_3342 : (!torch.int, !torch.int) -> !torch.list<int>
    %4149 = torch.aten.reshape %result0_3338, %4148 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3343 = torch.constant.int 0
    %int1_3344 = torch.constant.int 1
    %4150 = torch.aten.mm %4149, %490 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %4151 = torch.aten.add.Tensor %4150, %491, %int1_3344 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %4152 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3345 = torch.constant.int 1
    %int7_3346 = torch.constant.int 7
    %int6400_3347 = torch.constant.int 6400
    %4153 = torch.prim.ListConstruct %int1_3345, %int7_3346, %int6400_3347 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4154 = torch.aten.reshape %4151, %4153 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %4155 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4156 = torch.aten.mul.Tensor %4154, %4155 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4157 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %4158 = torch.aten.pow.Tensor_Tensor %4154, %4157 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4159 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4160 = torch.aten.mul.Tensor %4158, %4159 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_3348 = torch.constant.int 1
    %4161 = torch.aten.add.Tensor %4154, %4160, %int1_3348 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4162 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %4163 = torch.aten.mul.Tensor %4161, %4162 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4164 = torch.aten.tanh %4163 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4165 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_3349 = torch.constant.int 1
    %4166 = torch.aten.add.Tensor %4164, %4165, %int1_3349 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4167 = torch.aten.mul.Tensor %4156, %4166 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4168 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3350 = torch.constant.int 7
    %int6400_3351 = torch.constant.int 6400
    %4169 = torch.prim.ListConstruct %int7_3350, %int6400_3351 : (!torch.int, !torch.int) -> !torch.list<int>
    %4170 = torch.aten.reshape %4167, %4169 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_3352 = torch.constant.int 0
    %int1_3353 = torch.constant.int 1
    %4171 = torch.aten.mm %4170, %492 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4172 = torch.aten.add.Tensor %4171, %493, %int1_3353 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4173 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3354 = torch.constant.int 1
    %int7_3355 = torch.constant.int 7
    %int1600_3356 = torch.constant.int 1600
    %4174 = torch.prim.ListConstruct %int1_3354, %int7_3355, %int1600_3356 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4175 = torch.aten.reshape %4172, %4174 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3357 = torch.constant.int 1
    %4176 = torch.aten.add.Tensor %4145, %4175, %int1_3357 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3358 = torch.constant.float 9.9999997473787516E-6
    %int1600_3359 = torch.constant.int 1600
    %4177 = torch.prim.ListConstruct %int1600_3359 : (!torch.int) -> !torch.list<int>
    %result0_3360, %result1_3361, %result2_3362 = torch.aten.native_layer_norm %4176, %4177, %494, %495, %float9.999990e-06_3358 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4178 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3363 = torch.constant.int 7
    %int1600_3364 = torch.constant.int 1600
    %4179 = torch.prim.ListConstruct %int7_3363, %int1600_3364 : (!torch.int, !torch.int) -> !torch.list<int>
    %4180 = torch.aten.reshape %result0_3360, %4179 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3365 = torch.constant.int 0
    %int1_3366 = torch.constant.int 1
    %4181 = torch.aten.mm %4180, %496 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %4182 = torch.aten.add.Tensor %4181, %497, %int1_3366 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %4183 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3367 = torch.constant.int 1
    %int7_3368 = torch.constant.int 7
    %int4800_3369 = torch.constant.int 4800
    %4184 = torch.prim.ListConstruct %int1_3367, %int7_3368, %int4800_3369 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4185 = torch.aten.reshape %4182, %4184 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %4186 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %4187 = torch.prim.tolist(%4186) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_3370 = torch.constant.int 2
    %4188 = torch.aten.split_with_sizes %4185, %4187, %int2_3370 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %4189:3 = torch.prim.ListUnpack %4188 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %4190 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3371 = torch.constant.int 1
    %int7_3372 = torch.constant.int 7
    %int25_3373 = torch.constant.int 25
    %int64_3374 = torch.constant.int 64
    %4191 = torch.prim.ListConstruct %int1_3371, %int7_3372, %int25_3373, %int64_3374 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4192 = torch.aten.reshape %4189#0, %4191 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3375 = torch.constant.int 1
    %int2_3376 = torch.constant.int 2
    %4193 = torch.aten.transpose.int %4192, %int1_3375, %int2_3376 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4194 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3377 = torch.constant.int 1
    %int7_3378 = torch.constant.int 7
    %int25_3379 = torch.constant.int 25
    %int64_3380 = torch.constant.int 64
    %4195 = torch.prim.ListConstruct %int1_3377, %int7_3378, %int25_3379, %int64_3380 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4196 = torch.aten.reshape %4189#1, %4195 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3381 = torch.constant.int 1
    %int2_3382 = torch.constant.int 2
    %4197 = torch.aten.transpose.int %4196, %int1_3381, %int2_3382 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4198 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3383 = torch.constant.int 1
    %int7_3384 = torch.constant.int 7
    %int25_3385 = torch.constant.int 25
    %int64_3386 = torch.constant.int 64
    %4199 = torch.prim.ListConstruct %int1_3383, %int7_3384, %int25_3385, %int64_3386 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4200 = torch.aten.reshape %4189#2, %4199 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3387 = torch.constant.int 1
    %int2_3388 = torch.constant.int 2
    %4201 = torch.aten.transpose.int %4200, %int1_3387, %int2_3388 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3389 = torch.constant.int 1
    %int2_3390 = torch.constant.int 2
    %4202 = torch.aten.transpose.int %4196, %int1_3389, %int2_3390 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_3391 = torch.constant.int 2
    %int3_3392 = torch.constant.int 3
    %4203 = torch.aten.transpose.int %4202, %int2_3391, %int3_3392 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %4204 = torch.aten.matmul %4193, %4203 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4205 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4206 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4207 = torch.aten.pow.Tensor_Tensor %4205, %4206 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %4208 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_3393 = torch.constant.int 0
    %4209 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_3394 = torch.constant.none
    %float0.000000e00_3395 = torch.constant.float 0.000000e+00
    %4210 = torch.aten.full %4209, %float0.000000e00_3395, %none_3394, %none_3394, %none_3394, %none_3394 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_3396 = torch.constant.int 1
    %4211 = torch.aten.add.Tensor %4210, %4207, %int1_3396 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %4212 = torch.aten.div.Tensor %4204, %4211 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4213 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_3397 = torch.constant.int 6
    %none_3398 = torch.constant.none
    %false_3399 = torch.constant.bool false
    %4214 = torch.aten.to.dtype %4212, %int6_3397, %false_3399, %false_3399, %none_3398 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4215 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %4216 = torch.aten.where.self %4213, %4214, %4215 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_3400 = torch.constant.int 3
    %none_3401 = torch.constant.none
    %4217 = torch.aten.softmax.int %4216, %int3_3400, %none_3401 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_3402 = torch.constant.int 6
    %none_3403 = torch.constant.none
    %false_3404 = torch.constant.bool false
    %4218 = torch.aten.to.dtype %4217, %int6_3402, %false_3404, %false_3404, %none_3403 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4219 = torch.aten.matmul %4218, %4201 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3405 = torch.constant.int 1
    %int2_3406 = torch.constant.int 2
    %4220 = torch.aten.transpose.int %4219, %int1_3405, %int2_3406 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %4221 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3407 = torch.constant.int 1
    %int7_3408 = torch.constant.int 7
    %int1600_3409 = torch.constant.int 1600
    %4222 = torch.prim.ListConstruct %int1_3407, %int7_3408, %int1600_3409 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4223 = torch.aten.reshape %4220, %4222 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %4224 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3410 = torch.constant.int 7
    %int1600_3411 = torch.constant.int 1600
    %4225 = torch.prim.ListConstruct %int7_3410, %int1600_3411 : (!torch.int, !torch.int) -> !torch.list<int>
    %4226 = torch.aten.reshape %4223, %4225 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3412 = torch.constant.int 0
    %int1_3413 = torch.constant.int 1
    %4227 = torch.aten.mm %4226, %498 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4228 = torch.aten.add.Tensor %4227, %499, %int1_3413 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4229 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3414 = torch.constant.int 1
    %int7_3415 = torch.constant.int 7
    %int1600_3416 = torch.constant.int 1600
    %4230 = torch.prim.ListConstruct %int1_3414, %int7_3415, %int1600_3416 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4231 = torch.aten.reshape %4228, %4230 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3417 = torch.constant.int 1
    %4232 = torch.aten.add.Tensor %4231, %4176, %int1_3417 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3418 = torch.constant.float 9.9999997473787516E-6
    %int1600_3419 = torch.constant.int 1600
    %4233 = torch.prim.ListConstruct %int1600_3419 : (!torch.int) -> !torch.list<int>
    %result0_3420, %result1_3421, %result2_3422 = torch.aten.native_layer_norm %4232, %4233, %500, %501, %float9.999990e-06_3418 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4234 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3423 = torch.constant.int 7
    %int1600_3424 = torch.constant.int 1600
    %4235 = torch.prim.ListConstruct %int7_3423, %int1600_3424 : (!torch.int, !torch.int) -> !torch.list<int>
    %4236 = torch.aten.reshape %result0_3420, %4235 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3425 = torch.constant.int 0
    %int1_3426 = torch.constant.int 1
    %4237 = torch.aten.mm %4236, %502 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %4238 = torch.aten.add.Tensor %4237, %503, %int1_3426 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %4239 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3427 = torch.constant.int 1
    %int7_3428 = torch.constant.int 7
    %int6400_3429 = torch.constant.int 6400
    %4240 = torch.prim.ListConstruct %int1_3427, %int7_3428, %int6400_3429 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4241 = torch.aten.reshape %4238, %4240 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %4242 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4243 = torch.aten.mul.Tensor %4241, %4242 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4244 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %4245 = torch.aten.pow.Tensor_Tensor %4241, %4244 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4246 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4247 = torch.aten.mul.Tensor %4245, %4246 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_3430 = torch.constant.int 1
    %4248 = torch.aten.add.Tensor %4241, %4247, %int1_3430 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4249 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %4250 = torch.aten.mul.Tensor %4248, %4249 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4251 = torch.aten.tanh %4250 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4252 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_3431 = torch.constant.int 1
    %4253 = torch.aten.add.Tensor %4251, %4252, %int1_3431 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4254 = torch.aten.mul.Tensor %4243, %4253 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4255 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3432 = torch.constant.int 7
    %int6400_3433 = torch.constant.int 6400
    %4256 = torch.prim.ListConstruct %int7_3432, %int6400_3433 : (!torch.int, !torch.int) -> !torch.list<int>
    %4257 = torch.aten.reshape %4254, %4256 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_3434 = torch.constant.int 0
    %int1_3435 = torch.constant.int 1
    %4258 = torch.aten.mm %4257, %504 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4259 = torch.aten.add.Tensor %4258, %505, %int1_3435 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4260 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3436 = torch.constant.int 1
    %int7_3437 = torch.constant.int 7
    %int1600_3438 = torch.constant.int 1600
    %4261 = torch.prim.ListConstruct %int1_3436, %int7_3437, %int1600_3438 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4262 = torch.aten.reshape %4259, %4261 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3439 = torch.constant.int 1
    %4263 = torch.aten.add.Tensor %4232, %4262, %int1_3439 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3440 = torch.constant.float 9.9999997473787516E-6
    %int1600_3441 = torch.constant.int 1600
    %4264 = torch.prim.ListConstruct %int1600_3441 : (!torch.int) -> !torch.list<int>
    %result0_3442, %result1_3443, %result2_3444 = torch.aten.native_layer_norm %4263, %4264, %506, %507, %float9.999990e-06_3440 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4265 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3445 = torch.constant.int 7
    %int1600_3446 = torch.constant.int 1600
    %4266 = torch.prim.ListConstruct %int7_3445, %int1600_3446 : (!torch.int, !torch.int) -> !torch.list<int>
    %4267 = torch.aten.reshape %result0_3442, %4266 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3447 = torch.constant.int 0
    %int1_3448 = torch.constant.int 1
    %4268 = torch.aten.mm %4267, %508 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %4269 = torch.aten.add.Tensor %4268, %509, %int1_3448 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %4270 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3449 = torch.constant.int 1
    %int7_3450 = torch.constant.int 7
    %int4800_3451 = torch.constant.int 4800
    %4271 = torch.prim.ListConstruct %int1_3449, %int7_3450, %int4800_3451 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4272 = torch.aten.reshape %4269, %4271 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %4273 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %4274 = torch.prim.tolist(%4273) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_3452 = torch.constant.int 2
    %4275 = torch.aten.split_with_sizes %4272, %4274, %int2_3452 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %4276:3 = torch.prim.ListUnpack %4275 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %4277 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3453 = torch.constant.int 1
    %int7_3454 = torch.constant.int 7
    %int25_3455 = torch.constant.int 25
    %int64_3456 = torch.constant.int 64
    %4278 = torch.prim.ListConstruct %int1_3453, %int7_3454, %int25_3455, %int64_3456 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4279 = torch.aten.reshape %4276#0, %4278 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3457 = torch.constant.int 1
    %int2_3458 = torch.constant.int 2
    %4280 = torch.aten.transpose.int %4279, %int1_3457, %int2_3458 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4281 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3459 = torch.constant.int 1
    %int7_3460 = torch.constant.int 7
    %int25_3461 = torch.constant.int 25
    %int64_3462 = torch.constant.int 64
    %4282 = torch.prim.ListConstruct %int1_3459, %int7_3460, %int25_3461, %int64_3462 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4283 = torch.aten.reshape %4276#1, %4282 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3463 = torch.constant.int 1
    %int2_3464 = torch.constant.int 2
    %4284 = torch.aten.transpose.int %4283, %int1_3463, %int2_3464 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4285 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3465 = torch.constant.int 1
    %int7_3466 = torch.constant.int 7
    %int25_3467 = torch.constant.int 25
    %int64_3468 = torch.constant.int 64
    %4286 = torch.prim.ListConstruct %int1_3465, %int7_3466, %int25_3467, %int64_3468 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4287 = torch.aten.reshape %4276#2, %4286 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3469 = torch.constant.int 1
    %int2_3470 = torch.constant.int 2
    %4288 = torch.aten.transpose.int %4287, %int1_3469, %int2_3470 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3471 = torch.constant.int 1
    %int2_3472 = torch.constant.int 2
    %4289 = torch.aten.transpose.int %4283, %int1_3471, %int2_3472 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_3473 = torch.constant.int 2
    %int3_3474 = torch.constant.int 3
    %4290 = torch.aten.transpose.int %4289, %int2_3473, %int3_3474 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %4291 = torch.aten.matmul %4280, %4290 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4292 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4293 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4294 = torch.aten.pow.Tensor_Tensor %4292, %4293 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %4295 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_3475 = torch.constant.int 0
    %4296 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_3476 = torch.constant.none
    %float0.000000e00_3477 = torch.constant.float 0.000000e+00
    %4297 = torch.aten.full %4296, %float0.000000e00_3477, %none_3476, %none_3476, %none_3476, %none_3476 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_3478 = torch.constant.int 1
    %4298 = torch.aten.add.Tensor %4297, %4294, %int1_3478 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %4299 = torch.aten.div.Tensor %4291, %4298 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4300 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_3479 = torch.constant.int 6
    %none_3480 = torch.constant.none
    %false_3481 = torch.constant.bool false
    %4301 = torch.aten.to.dtype %4299, %int6_3479, %false_3481, %false_3481, %none_3480 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4302 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %4303 = torch.aten.where.self %4300, %4301, %4302 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_3482 = torch.constant.int 3
    %none_3483 = torch.constant.none
    %4304 = torch.aten.softmax.int %4303, %int3_3482, %none_3483 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_3484 = torch.constant.int 6
    %none_3485 = torch.constant.none
    %false_3486 = torch.constant.bool false
    %4305 = torch.aten.to.dtype %4304, %int6_3484, %false_3486, %false_3486, %none_3485 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4306 = torch.aten.matmul %4305, %4288 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3487 = torch.constant.int 1
    %int2_3488 = torch.constant.int 2
    %4307 = torch.aten.transpose.int %4306, %int1_3487, %int2_3488 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %4308 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3489 = torch.constant.int 1
    %int7_3490 = torch.constant.int 7
    %int1600_3491 = torch.constant.int 1600
    %4309 = torch.prim.ListConstruct %int1_3489, %int7_3490, %int1600_3491 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4310 = torch.aten.reshape %4307, %4309 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %4311 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3492 = torch.constant.int 7
    %int1600_3493 = torch.constant.int 1600
    %4312 = torch.prim.ListConstruct %int7_3492, %int1600_3493 : (!torch.int, !torch.int) -> !torch.list<int>
    %4313 = torch.aten.reshape %4310, %4312 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3494 = torch.constant.int 0
    %int1_3495 = torch.constant.int 1
    %4314 = torch.aten.mm %4313, %510 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4315 = torch.aten.add.Tensor %4314, %511, %int1_3495 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4316 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3496 = torch.constant.int 1
    %int7_3497 = torch.constant.int 7
    %int1600_3498 = torch.constant.int 1600
    %4317 = torch.prim.ListConstruct %int1_3496, %int7_3497, %int1600_3498 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4318 = torch.aten.reshape %4315, %4317 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3499 = torch.constant.int 1
    %4319 = torch.aten.add.Tensor %4318, %4263, %int1_3499 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3500 = torch.constant.float 9.9999997473787516E-6
    %int1600_3501 = torch.constant.int 1600
    %4320 = torch.prim.ListConstruct %int1600_3501 : (!torch.int) -> !torch.list<int>
    %result0_3502, %result1_3503, %result2_3504 = torch.aten.native_layer_norm %4319, %4320, %512, %513, %float9.999990e-06_3500 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4321 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3505 = torch.constant.int 7
    %int1600_3506 = torch.constant.int 1600
    %4322 = torch.prim.ListConstruct %int7_3505, %int1600_3506 : (!torch.int, !torch.int) -> !torch.list<int>
    %4323 = torch.aten.reshape %result0_3502, %4322 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3507 = torch.constant.int 0
    %int1_3508 = torch.constant.int 1
    %4324 = torch.aten.mm %4323, %514 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %4325 = torch.aten.add.Tensor %4324, %515, %int1_3508 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %4326 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3509 = torch.constant.int 1
    %int7_3510 = torch.constant.int 7
    %int6400_3511 = torch.constant.int 6400
    %4327 = torch.prim.ListConstruct %int1_3509, %int7_3510, %int6400_3511 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4328 = torch.aten.reshape %4325, %4327 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %4329 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4330 = torch.aten.mul.Tensor %4328, %4329 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4331 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %4332 = torch.aten.pow.Tensor_Tensor %4328, %4331 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4333 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4334 = torch.aten.mul.Tensor %4332, %4333 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_3512 = torch.constant.int 1
    %4335 = torch.aten.add.Tensor %4328, %4334, %int1_3512 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4336 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %4337 = torch.aten.mul.Tensor %4335, %4336 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4338 = torch.aten.tanh %4337 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4339 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_3513 = torch.constant.int 1
    %4340 = torch.aten.add.Tensor %4338, %4339, %int1_3513 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4341 = torch.aten.mul.Tensor %4330, %4340 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4342 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3514 = torch.constant.int 7
    %int6400_3515 = torch.constant.int 6400
    %4343 = torch.prim.ListConstruct %int7_3514, %int6400_3515 : (!torch.int, !torch.int) -> !torch.list<int>
    %4344 = torch.aten.reshape %4341, %4343 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_3516 = torch.constant.int 0
    %int1_3517 = torch.constant.int 1
    %4345 = torch.aten.mm %4344, %516 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4346 = torch.aten.add.Tensor %4345, %517, %int1_3517 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4347 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3518 = torch.constant.int 1
    %int7_3519 = torch.constant.int 7
    %int1600_3520 = torch.constant.int 1600
    %4348 = torch.prim.ListConstruct %int1_3518, %int7_3519, %int1600_3520 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4349 = torch.aten.reshape %4346, %4348 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3521 = torch.constant.int 1
    %4350 = torch.aten.add.Tensor %4319, %4349, %int1_3521 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3522 = torch.constant.float 9.9999997473787516E-6
    %int1600_3523 = torch.constant.int 1600
    %4351 = torch.prim.ListConstruct %int1600_3523 : (!torch.int) -> !torch.list<int>
    %result0_3524, %result1_3525, %result2_3526 = torch.aten.native_layer_norm %4350, %4351, %518, %519, %float9.999990e-06_3522 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4352 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3527 = torch.constant.int 7
    %int1600_3528 = torch.constant.int 1600
    %4353 = torch.prim.ListConstruct %int7_3527, %int1600_3528 : (!torch.int, !torch.int) -> !torch.list<int>
    %4354 = torch.aten.reshape %result0_3524, %4353 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3529 = torch.constant.int 0
    %int1_3530 = torch.constant.int 1
    %4355 = torch.aten.mm %4354, %520 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %4356 = torch.aten.add.Tensor %4355, %521, %int1_3530 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %4357 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3531 = torch.constant.int 1
    %int7_3532 = torch.constant.int 7
    %int4800_3533 = torch.constant.int 4800
    %4358 = torch.prim.ListConstruct %int1_3531, %int7_3532, %int4800_3533 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4359 = torch.aten.reshape %4356, %4358 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %4360 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %4361 = torch.prim.tolist(%4360) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_3534 = torch.constant.int 2
    %4362 = torch.aten.split_with_sizes %4359, %4361, %int2_3534 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %4363:3 = torch.prim.ListUnpack %4362 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %4364 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3535 = torch.constant.int 1
    %int7_3536 = torch.constant.int 7
    %int25_3537 = torch.constant.int 25
    %int64_3538 = torch.constant.int 64
    %4365 = torch.prim.ListConstruct %int1_3535, %int7_3536, %int25_3537, %int64_3538 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4366 = torch.aten.reshape %4363#0, %4365 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3539 = torch.constant.int 1
    %int2_3540 = torch.constant.int 2
    %4367 = torch.aten.transpose.int %4366, %int1_3539, %int2_3540 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4368 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3541 = torch.constant.int 1
    %int7_3542 = torch.constant.int 7
    %int25_3543 = torch.constant.int 25
    %int64_3544 = torch.constant.int 64
    %4369 = torch.prim.ListConstruct %int1_3541, %int7_3542, %int25_3543, %int64_3544 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4370 = torch.aten.reshape %4363#1, %4369 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3545 = torch.constant.int 1
    %int2_3546 = torch.constant.int 2
    %4371 = torch.aten.transpose.int %4370, %int1_3545, %int2_3546 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4372 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3547 = torch.constant.int 1
    %int7_3548 = torch.constant.int 7
    %int25_3549 = torch.constant.int 25
    %int64_3550 = torch.constant.int 64
    %4373 = torch.prim.ListConstruct %int1_3547, %int7_3548, %int25_3549, %int64_3550 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4374 = torch.aten.reshape %4363#2, %4373 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3551 = torch.constant.int 1
    %int2_3552 = torch.constant.int 2
    %4375 = torch.aten.transpose.int %4374, %int1_3551, %int2_3552 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3553 = torch.constant.int 1
    %int2_3554 = torch.constant.int 2
    %4376 = torch.aten.transpose.int %4370, %int1_3553, %int2_3554 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_3555 = torch.constant.int 2
    %int3_3556 = torch.constant.int 3
    %4377 = torch.aten.transpose.int %4376, %int2_3555, %int3_3556 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %4378 = torch.aten.matmul %4367, %4377 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4379 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4380 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4381 = torch.aten.pow.Tensor_Tensor %4379, %4380 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %4382 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_3557 = torch.constant.int 0
    %4383 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_3558 = torch.constant.none
    %float0.000000e00_3559 = torch.constant.float 0.000000e+00
    %4384 = torch.aten.full %4383, %float0.000000e00_3559, %none_3558, %none_3558, %none_3558, %none_3558 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_3560 = torch.constant.int 1
    %4385 = torch.aten.add.Tensor %4384, %4381, %int1_3560 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %4386 = torch.aten.div.Tensor %4378, %4385 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4387 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_3561 = torch.constant.int 6
    %none_3562 = torch.constant.none
    %false_3563 = torch.constant.bool false
    %4388 = torch.aten.to.dtype %4386, %int6_3561, %false_3563, %false_3563, %none_3562 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4389 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %4390 = torch.aten.where.self %4387, %4388, %4389 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_3564 = torch.constant.int 3
    %none_3565 = torch.constant.none
    %4391 = torch.aten.softmax.int %4390, %int3_3564, %none_3565 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_3566 = torch.constant.int 6
    %none_3567 = torch.constant.none
    %false_3568 = torch.constant.bool false
    %4392 = torch.aten.to.dtype %4391, %int6_3566, %false_3568, %false_3568, %none_3567 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4393 = torch.aten.matmul %4392, %4375 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3569 = torch.constant.int 1
    %int2_3570 = torch.constant.int 2
    %4394 = torch.aten.transpose.int %4393, %int1_3569, %int2_3570 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %4395 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3571 = torch.constant.int 1
    %int7_3572 = torch.constant.int 7
    %int1600_3573 = torch.constant.int 1600
    %4396 = torch.prim.ListConstruct %int1_3571, %int7_3572, %int1600_3573 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4397 = torch.aten.reshape %4394, %4396 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %4398 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3574 = torch.constant.int 7
    %int1600_3575 = torch.constant.int 1600
    %4399 = torch.prim.ListConstruct %int7_3574, %int1600_3575 : (!torch.int, !torch.int) -> !torch.list<int>
    %4400 = torch.aten.reshape %4397, %4399 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3576 = torch.constant.int 0
    %int1_3577 = torch.constant.int 1
    %4401 = torch.aten.mm %4400, %522 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4402 = torch.aten.add.Tensor %4401, %523, %int1_3577 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4403 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3578 = torch.constant.int 1
    %int7_3579 = torch.constant.int 7
    %int1600_3580 = torch.constant.int 1600
    %4404 = torch.prim.ListConstruct %int1_3578, %int7_3579, %int1600_3580 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4405 = torch.aten.reshape %4402, %4404 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3581 = torch.constant.int 1
    %4406 = torch.aten.add.Tensor %4405, %4350, %int1_3581 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3582 = torch.constant.float 9.9999997473787516E-6
    %int1600_3583 = torch.constant.int 1600
    %4407 = torch.prim.ListConstruct %int1600_3583 : (!torch.int) -> !torch.list<int>
    %result0_3584, %result1_3585, %result2_3586 = torch.aten.native_layer_norm %4406, %4407, %524, %525, %float9.999990e-06_3582 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4408 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3587 = torch.constant.int 7
    %int1600_3588 = torch.constant.int 1600
    %4409 = torch.prim.ListConstruct %int7_3587, %int1600_3588 : (!torch.int, !torch.int) -> !torch.list<int>
    %4410 = torch.aten.reshape %result0_3584, %4409 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3589 = torch.constant.int 0
    %int1_3590 = torch.constant.int 1
    %4411 = torch.aten.mm %4410, %526 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %4412 = torch.aten.add.Tensor %4411, %527, %int1_3590 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %4413 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3591 = torch.constant.int 1
    %int7_3592 = torch.constant.int 7
    %int6400_3593 = torch.constant.int 6400
    %4414 = torch.prim.ListConstruct %int1_3591, %int7_3592, %int6400_3593 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4415 = torch.aten.reshape %4412, %4414 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %4416 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4417 = torch.aten.mul.Tensor %4415, %4416 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4418 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %4419 = torch.aten.pow.Tensor_Tensor %4415, %4418 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4420 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4421 = torch.aten.mul.Tensor %4419, %4420 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_3594 = torch.constant.int 1
    %4422 = torch.aten.add.Tensor %4415, %4421, %int1_3594 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4423 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %4424 = torch.aten.mul.Tensor %4422, %4423 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4425 = torch.aten.tanh %4424 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4426 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_3595 = torch.constant.int 1
    %4427 = torch.aten.add.Tensor %4425, %4426, %int1_3595 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4428 = torch.aten.mul.Tensor %4417, %4427 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4429 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3596 = torch.constant.int 7
    %int6400_3597 = torch.constant.int 6400
    %4430 = torch.prim.ListConstruct %int7_3596, %int6400_3597 : (!torch.int, !torch.int) -> !torch.list<int>
    %4431 = torch.aten.reshape %4428, %4430 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_3598 = torch.constant.int 0
    %int1_3599 = torch.constant.int 1
    %4432 = torch.aten.mm %4431, %528 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4433 = torch.aten.add.Tensor %4432, %529, %int1_3599 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4434 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3600 = torch.constant.int 1
    %int7_3601 = torch.constant.int 7
    %int1600_3602 = torch.constant.int 1600
    %4435 = torch.prim.ListConstruct %int1_3600, %int7_3601, %int1600_3602 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4436 = torch.aten.reshape %4433, %4435 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3603 = torch.constant.int 1
    %4437 = torch.aten.add.Tensor %4406, %4436, %int1_3603 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3604 = torch.constant.float 9.9999997473787516E-6
    %int1600_3605 = torch.constant.int 1600
    %4438 = torch.prim.ListConstruct %int1600_3605 : (!torch.int) -> !torch.list<int>
    %result0_3606, %result1_3607, %result2_3608 = torch.aten.native_layer_norm %4437, %4438, %530, %531, %float9.999990e-06_3604 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4439 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3609 = torch.constant.int 7
    %int1600_3610 = torch.constant.int 1600
    %4440 = torch.prim.ListConstruct %int7_3609, %int1600_3610 : (!torch.int, !torch.int) -> !torch.list<int>
    %4441 = torch.aten.reshape %result0_3606, %4440 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3611 = torch.constant.int 0
    %int1_3612 = torch.constant.int 1
    %4442 = torch.aten.mm %4441, %532 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %4443 = torch.aten.add.Tensor %4442, %533, %int1_3612 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %4444 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3613 = torch.constant.int 1
    %int7_3614 = torch.constant.int 7
    %int4800_3615 = torch.constant.int 4800
    %4445 = torch.prim.ListConstruct %int1_3613, %int7_3614, %int4800_3615 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4446 = torch.aten.reshape %4443, %4445 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %4447 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %4448 = torch.prim.tolist(%4447) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_3616 = torch.constant.int 2
    %4449 = torch.aten.split_with_sizes %4446, %4448, %int2_3616 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %4450:3 = torch.prim.ListUnpack %4449 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %4451 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3617 = torch.constant.int 1
    %int7_3618 = torch.constant.int 7
    %int25_3619 = torch.constant.int 25
    %int64_3620 = torch.constant.int 64
    %4452 = torch.prim.ListConstruct %int1_3617, %int7_3618, %int25_3619, %int64_3620 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4453 = torch.aten.reshape %4450#0, %4452 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3621 = torch.constant.int 1
    %int2_3622 = torch.constant.int 2
    %4454 = torch.aten.transpose.int %4453, %int1_3621, %int2_3622 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4455 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3623 = torch.constant.int 1
    %int7_3624 = torch.constant.int 7
    %int25_3625 = torch.constant.int 25
    %int64_3626 = torch.constant.int 64
    %4456 = torch.prim.ListConstruct %int1_3623, %int7_3624, %int25_3625, %int64_3626 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4457 = torch.aten.reshape %4450#1, %4456 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3627 = torch.constant.int 1
    %int2_3628 = torch.constant.int 2
    %4458 = torch.aten.transpose.int %4457, %int1_3627, %int2_3628 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4459 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3629 = torch.constant.int 1
    %int7_3630 = torch.constant.int 7
    %int25_3631 = torch.constant.int 25
    %int64_3632 = torch.constant.int 64
    %4460 = torch.prim.ListConstruct %int1_3629, %int7_3630, %int25_3631, %int64_3632 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4461 = torch.aten.reshape %4450#2, %4460 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3633 = torch.constant.int 1
    %int2_3634 = torch.constant.int 2
    %4462 = torch.aten.transpose.int %4461, %int1_3633, %int2_3634 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3635 = torch.constant.int 1
    %int2_3636 = torch.constant.int 2
    %4463 = torch.aten.transpose.int %4457, %int1_3635, %int2_3636 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_3637 = torch.constant.int 2
    %int3_3638 = torch.constant.int 3
    %4464 = torch.aten.transpose.int %4463, %int2_3637, %int3_3638 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %4465 = torch.aten.matmul %4454, %4464 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4466 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4467 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4468 = torch.aten.pow.Tensor_Tensor %4466, %4467 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %4469 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_3639 = torch.constant.int 0
    %4470 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_3640 = torch.constant.none
    %float0.000000e00_3641 = torch.constant.float 0.000000e+00
    %4471 = torch.aten.full %4470, %float0.000000e00_3641, %none_3640, %none_3640, %none_3640, %none_3640 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_3642 = torch.constant.int 1
    %4472 = torch.aten.add.Tensor %4471, %4468, %int1_3642 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %4473 = torch.aten.div.Tensor %4465, %4472 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4474 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_3643 = torch.constant.int 6
    %none_3644 = torch.constant.none
    %false_3645 = torch.constant.bool false
    %4475 = torch.aten.to.dtype %4473, %int6_3643, %false_3645, %false_3645, %none_3644 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4476 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %4477 = torch.aten.where.self %4474, %4475, %4476 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_3646 = torch.constant.int 3
    %none_3647 = torch.constant.none
    %4478 = torch.aten.softmax.int %4477, %int3_3646, %none_3647 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_3648 = torch.constant.int 6
    %none_3649 = torch.constant.none
    %false_3650 = torch.constant.bool false
    %4479 = torch.aten.to.dtype %4478, %int6_3648, %false_3650, %false_3650, %none_3649 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4480 = torch.aten.matmul %4479, %4462 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3651 = torch.constant.int 1
    %int2_3652 = torch.constant.int 2
    %4481 = torch.aten.transpose.int %4480, %int1_3651, %int2_3652 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %4482 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3653 = torch.constant.int 1
    %int7_3654 = torch.constant.int 7
    %int1600_3655 = torch.constant.int 1600
    %4483 = torch.prim.ListConstruct %int1_3653, %int7_3654, %int1600_3655 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4484 = torch.aten.reshape %4481, %4483 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %4485 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3656 = torch.constant.int 7
    %int1600_3657 = torch.constant.int 1600
    %4486 = torch.prim.ListConstruct %int7_3656, %int1600_3657 : (!torch.int, !torch.int) -> !torch.list<int>
    %4487 = torch.aten.reshape %4484, %4486 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3658 = torch.constant.int 0
    %int1_3659 = torch.constant.int 1
    %4488 = torch.aten.mm %4487, %534 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4489 = torch.aten.add.Tensor %4488, %535, %int1_3659 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4490 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3660 = torch.constant.int 1
    %int7_3661 = torch.constant.int 7
    %int1600_3662 = torch.constant.int 1600
    %4491 = torch.prim.ListConstruct %int1_3660, %int7_3661, %int1600_3662 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4492 = torch.aten.reshape %4489, %4491 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3663 = torch.constant.int 1
    %4493 = torch.aten.add.Tensor %4492, %4437, %int1_3663 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3664 = torch.constant.float 9.9999997473787516E-6
    %int1600_3665 = torch.constant.int 1600
    %4494 = torch.prim.ListConstruct %int1600_3665 : (!torch.int) -> !torch.list<int>
    %result0_3666, %result1_3667, %result2_3668 = torch.aten.native_layer_norm %4493, %4494, %536, %537, %float9.999990e-06_3664 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4495 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3669 = torch.constant.int 7
    %int1600_3670 = torch.constant.int 1600
    %4496 = torch.prim.ListConstruct %int7_3669, %int1600_3670 : (!torch.int, !torch.int) -> !torch.list<int>
    %4497 = torch.aten.reshape %result0_3666, %4496 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3671 = torch.constant.int 0
    %int1_3672 = torch.constant.int 1
    %4498 = torch.aten.mm %4497, %538 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %4499 = torch.aten.add.Tensor %4498, %539, %int1_3672 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %4500 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3673 = torch.constant.int 1
    %int7_3674 = torch.constant.int 7
    %int6400_3675 = torch.constant.int 6400
    %4501 = torch.prim.ListConstruct %int1_3673, %int7_3674, %int6400_3675 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4502 = torch.aten.reshape %4499, %4501 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %4503 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4504 = torch.aten.mul.Tensor %4502, %4503 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4505 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %4506 = torch.aten.pow.Tensor_Tensor %4502, %4505 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4507 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4508 = torch.aten.mul.Tensor %4506, %4507 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_3676 = torch.constant.int 1
    %4509 = torch.aten.add.Tensor %4502, %4508, %int1_3676 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4510 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %4511 = torch.aten.mul.Tensor %4509, %4510 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4512 = torch.aten.tanh %4511 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4513 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_3677 = torch.constant.int 1
    %4514 = torch.aten.add.Tensor %4512, %4513, %int1_3677 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4515 = torch.aten.mul.Tensor %4504, %4514 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4516 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3678 = torch.constant.int 7
    %int6400_3679 = torch.constant.int 6400
    %4517 = torch.prim.ListConstruct %int7_3678, %int6400_3679 : (!torch.int, !torch.int) -> !torch.list<int>
    %4518 = torch.aten.reshape %4515, %4517 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_3680 = torch.constant.int 0
    %int1_3681 = torch.constant.int 1
    %4519 = torch.aten.mm %4518, %540 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4520 = torch.aten.add.Tensor %4519, %541, %int1_3681 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4521 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3682 = torch.constant.int 1
    %int7_3683 = torch.constant.int 7
    %int1600_3684 = torch.constant.int 1600
    %4522 = torch.prim.ListConstruct %int1_3682, %int7_3683, %int1600_3684 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4523 = torch.aten.reshape %4520, %4522 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3685 = torch.constant.int 1
    %4524 = torch.aten.add.Tensor %4493, %4523, %int1_3685 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3686 = torch.constant.float 9.9999997473787516E-6
    %int1600_3687 = torch.constant.int 1600
    %4525 = torch.prim.ListConstruct %int1600_3687 : (!torch.int) -> !torch.list<int>
    %result0_3688, %result1_3689, %result2_3690 = torch.aten.native_layer_norm %4524, %4525, %542, %543, %float9.999990e-06_3686 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4526 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3691 = torch.constant.int 7
    %int1600_3692 = torch.constant.int 1600
    %4527 = torch.prim.ListConstruct %int7_3691, %int1600_3692 : (!torch.int, !torch.int) -> !torch.list<int>
    %4528 = torch.aten.reshape %result0_3688, %4527 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3693 = torch.constant.int 0
    %int1_3694 = torch.constant.int 1
    %4529 = torch.aten.mm %4528, %544 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %4530 = torch.aten.add.Tensor %4529, %545, %int1_3694 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %4531 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3695 = torch.constant.int 1
    %int7_3696 = torch.constant.int 7
    %int4800_3697 = torch.constant.int 4800
    %4532 = torch.prim.ListConstruct %int1_3695, %int7_3696, %int4800_3697 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4533 = torch.aten.reshape %4530, %4532 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %4534 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %4535 = torch.prim.tolist(%4534) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_3698 = torch.constant.int 2
    %4536 = torch.aten.split_with_sizes %4533, %4535, %int2_3698 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %4537:3 = torch.prim.ListUnpack %4536 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %4538 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3699 = torch.constant.int 1
    %int7_3700 = torch.constant.int 7
    %int25_3701 = torch.constant.int 25
    %int64_3702 = torch.constant.int 64
    %4539 = torch.prim.ListConstruct %int1_3699, %int7_3700, %int25_3701, %int64_3702 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4540 = torch.aten.reshape %4537#0, %4539 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3703 = torch.constant.int 1
    %int2_3704 = torch.constant.int 2
    %4541 = torch.aten.transpose.int %4540, %int1_3703, %int2_3704 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4542 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3705 = torch.constant.int 1
    %int7_3706 = torch.constant.int 7
    %int25_3707 = torch.constant.int 25
    %int64_3708 = torch.constant.int 64
    %4543 = torch.prim.ListConstruct %int1_3705, %int7_3706, %int25_3707, %int64_3708 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4544 = torch.aten.reshape %4537#1, %4543 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3709 = torch.constant.int 1
    %int2_3710 = torch.constant.int 2
    %4545 = torch.aten.transpose.int %4544, %int1_3709, %int2_3710 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4546 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3711 = torch.constant.int 1
    %int7_3712 = torch.constant.int 7
    %int25_3713 = torch.constant.int 25
    %int64_3714 = torch.constant.int 64
    %4547 = torch.prim.ListConstruct %int1_3711, %int7_3712, %int25_3713, %int64_3714 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4548 = torch.aten.reshape %4537#2, %4547 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3715 = torch.constant.int 1
    %int2_3716 = torch.constant.int 2
    %4549 = torch.aten.transpose.int %4548, %int1_3715, %int2_3716 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3717 = torch.constant.int 1
    %int2_3718 = torch.constant.int 2
    %4550 = torch.aten.transpose.int %4544, %int1_3717, %int2_3718 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_3719 = torch.constant.int 2
    %int3_3720 = torch.constant.int 3
    %4551 = torch.aten.transpose.int %4550, %int2_3719, %int3_3720 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %4552 = torch.aten.matmul %4541, %4551 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4553 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4554 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4555 = torch.aten.pow.Tensor_Tensor %4553, %4554 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %4556 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_3721 = torch.constant.int 0
    %4557 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_3722 = torch.constant.none
    %float0.000000e00_3723 = torch.constant.float 0.000000e+00
    %4558 = torch.aten.full %4557, %float0.000000e00_3723, %none_3722, %none_3722, %none_3722, %none_3722 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_3724 = torch.constant.int 1
    %4559 = torch.aten.add.Tensor %4558, %4555, %int1_3724 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %4560 = torch.aten.div.Tensor %4552, %4559 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4561 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_3725 = torch.constant.int 6
    %none_3726 = torch.constant.none
    %false_3727 = torch.constant.bool false
    %4562 = torch.aten.to.dtype %4560, %int6_3725, %false_3727, %false_3727, %none_3726 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4563 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %4564 = torch.aten.where.self %4561, %4562, %4563 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_3728 = torch.constant.int 3
    %none_3729 = torch.constant.none
    %4565 = torch.aten.softmax.int %4564, %int3_3728, %none_3729 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_3730 = torch.constant.int 6
    %none_3731 = torch.constant.none
    %false_3732 = torch.constant.bool false
    %4566 = torch.aten.to.dtype %4565, %int6_3730, %false_3732, %false_3732, %none_3731 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4567 = torch.aten.matmul %4566, %4549 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3733 = torch.constant.int 1
    %int2_3734 = torch.constant.int 2
    %4568 = torch.aten.transpose.int %4567, %int1_3733, %int2_3734 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %4569 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3735 = torch.constant.int 1
    %int7_3736 = torch.constant.int 7
    %int1600_3737 = torch.constant.int 1600
    %4570 = torch.prim.ListConstruct %int1_3735, %int7_3736, %int1600_3737 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4571 = torch.aten.reshape %4568, %4570 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %4572 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3738 = torch.constant.int 7
    %int1600_3739 = torch.constant.int 1600
    %4573 = torch.prim.ListConstruct %int7_3738, %int1600_3739 : (!torch.int, !torch.int) -> !torch.list<int>
    %4574 = torch.aten.reshape %4571, %4573 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3740 = torch.constant.int 0
    %int1_3741 = torch.constant.int 1
    %4575 = torch.aten.mm %4574, %546 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4576 = torch.aten.add.Tensor %4575, %547, %int1_3741 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4577 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3742 = torch.constant.int 1
    %int7_3743 = torch.constant.int 7
    %int1600_3744 = torch.constant.int 1600
    %4578 = torch.prim.ListConstruct %int1_3742, %int7_3743, %int1600_3744 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4579 = torch.aten.reshape %4576, %4578 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3745 = torch.constant.int 1
    %4580 = torch.aten.add.Tensor %4579, %4524, %int1_3745 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3746 = torch.constant.float 9.9999997473787516E-6
    %int1600_3747 = torch.constant.int 1600
    %4581 = torch.prim.ListConstruct %int1600_3747 : (!torch.int) -> !torch.list<int>
    %result0_3748, %result1_3749, %result2_3750 = torch.aten.native_layer_norm %4580, %4581, %548, %549, %float9.999990e-06_3746 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4582 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3751 = torch.constant.int 7
    %int1600_3752 = torch.constant.int 1600
    %4583 = torch.prim.ListConstruct %int7_3751, %int1600_3752 : (!torch.int, !torch.int) -> !torch.list<int>
    %4584 = torch.aten.reshape %result0_3748, %4583 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3753 = torch.constant.int 0
    %int1_3754 = torch.constant.int 1
    %4585 = torch.aten.mm %4584, %550 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %4586 = torch.aten.add.Tensor %4585, %551, %int1_3754 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %4587 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3755 = torch.constant.int 1
    %int7_3756 = torch.constant.int 7
    %int6400_3757 = torch.constant.int 6400
    %4588 = torch.prim.ListConstruct %int1_3755, %int7_3756, %int6400_3757 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4589 = torch.aten.reshape %4586, %4588 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %4590 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4591 = torch.aten.mul.Tensor %4589, %4590 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4592 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %4593 = torch.aten.pow.Tensor_Tensor %4589, %4592 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4594 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4595 = torch.aten.mul.Tensor %4593, %4594 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_3758 = torch.constant.int 1
    %4596 = torch.aten.add.Tensor %4589, %4595, %int1_3758 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4597 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %4598 = torch.aten.mul.Tensor %4596, %4597 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4599 = torch.aten.tanh %4598 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4600 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_3759 = torch.constant.int 1
    %4601 = torch.aten.add.Tensor %4599, %4600, %int1_3759 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4602 = torch.aten.mul.Tensor %4591, %4601 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4603 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3760 = torch.constant.int 7
    %int6400_3761 = torch.constant.int 6400
    %4604 = torch.prim.ListConstruct %int7_3760, %int6400_3761 : (!torch.int, !torch.int) -> !torch.list<int>
    %4605 = torch.aten.reshape %4602, %4604 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_3762 = torch.constant.int 0
    %int1_3763 = torch.constant.int 1
    %4606 = torch.aten.mm %4605, %552 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4607 = torch.aten.add.Tensor %4606, %553, %int1_3763 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4608 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3764 = torch.constant.int 1
    %int7_3765 = torch.constant.int 7
    %int1600_3766 = torch.constant.int 1600
    %4609 = torch.prim.ListConstruct %int1_3764, %int7_3765, %int1600_3766 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4610 = torch.aten.reshape %4607, %4609 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3767 = torch.constant.int 1
    %4611 = torch.aten.add.Tensor %4580, %4610, %int1_3767 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3768 = torch.constant.float 9.9999997473787516E-6
    %int1600_3769 = torch.constant.int 1600
    %4612 = torch.prim.ListConstruct %int1600_3769 : (!torch.int) -> !torch.list<int>
    %result0_3770, %result1_3771, %result2_3772 = torch.aten.native_layer_norm %4611, %4612, %554, %555, %float9.999990e-06_3768 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4613 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3773 = torch.constant.int 7
    %int1600_3774 = torch.constant.int 1600
    %4614 = torch.prim.ListConstruct %int7_3773, %int1600_3774 : (!torch.int, !torch.int) -> !torch.list<int>
    %4615 = torch.aten.reshape %result0_3770, %4614 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3775 = torch.constant.int 0
    %int1_3776 = torch.constant.int 1
    %4616 = torch.aten.mm %4615, %556 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %4617 = torch.aten.add.Tensor %4616, %557, %int1_3776 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %4618 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3777 = torch.constant.int 1
    %int7_3778 = torch.constant.int 7
    %int4800_3779 = torch.constant.int 4800
    %4619 = torch.prim.ListConstruct %int1_3777, %int7_3778, %int4800_3779 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4620 = torch.aten.reshape %4617, %4619 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %4621 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %4622 = torch.prim.tolist(%4621) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_3780 = torch.constant.int 2
    %4623 = torch.aten.split_with_sizes %4620, %4622, %int2_3780 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %4624:3 = torch.prim.ListUnpack %4623 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %4625 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3781 = torch.constant.int 1
    %int7_3782 = torch.constant.int 7
    %int25_3783 = torch.constant.int 25
    %int64_3784 = torch.constant.int 64
    %4626 = torch.prim.ListConstruct %int1_3781, %int7_3782, %int25_3783, %int64_3784 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4627 = torch.aten.reshape %4624#0, %4626 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3785 = torch.constant.int 1
    %int2_3786 = torch.constant.int 2
    %4628 = torch.aten.transpose.int %4627, %int1_3785, %int2_3786 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4629 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3787 = torch.constant.int 1
    %int7_3788 = torch.constant.int 7
    %int25_3789 = torch.constant.int 25
    %int64_3790 = torch.constant.int 64
    %4630 = torch.prim.ListConstruct %int1_3787, %int7_3788, %int25_3789, %int64_3790 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4631 = torch.aten.reshape %4624#1, %4630 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3791 = torch.constant.int 1
    %int2_3792 = torch.constant.int 2
    %4632 = torch.aten.transpose.int %4631, %int1_3791, %int2_3792 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4633 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3793 = torch.constant.int 1
    %int7_3794 = torch.constant.int 7
    %int25_3795 = torch.constant.int 25
    %int64_3796 = torch.constant.int 64
    %4634 = torch.prim.ListConstruct %int1_3793, %int7_3794, %int25_3795, %int64_3796 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4635 = torch.aten.reshape %4624#2, %4634 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3797 = torch.constant.int 1
    %int2_3798 = torch.constant.int 2
    %4636 = torch.aten.transpose.int %4635, %int1_3797, %int2_3798 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3799 = torch.constant.int 1
    %int2_3800 = torch.constant.int 2
    %4637 = torch.aten.transpose.int %4631, %int1_3799, %int2_3800 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_3801 = torch.constant.int 2
    %int3_3802 = torch.constant.int 3
    %4638 = torch.aten.transpose.int %4637, %int2_3801, %int3_3802 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %4639 = torch.aten.matmul %4628, %4638 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4640 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4641 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4642 = torch.aten.pow.Tensor_Tensor %4640, %4641 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %4643 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_3803 = torch.constant.int 0
    %4644 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_3804 = torch.constant.none
    %float0.000000e00_3805 = torch.constant.float 0.000000e+00
    %4645 = torch.aten.full %4644, %float0.000000e00_3805, %none_3804, %none_3804, %none_3804, %none_3804 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_3806 = torch.constant.int 1
    %4646 = torch.aten.add.Tensor %4645, %4642, %int1_3806 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %4647 = torch.aten.div.Tensor %4639, %4646 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4648 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_3807 = torch.constant.int 6
    %none_3808 = torch.constant.none
    %false_3809 = torch.constant.bool false
    %4649 = torch.aten.to.dtype %4647, %int6_3807, %false_3809, %false_3809, %none_3808 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4650 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %4651 = torch.aten.where.self %4648, %4649, %4650 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_3810 = torch.constant.int 3
    %none_3811 = torch.constant.none
    %4652 = torch.aten.softmax.int %4651, %int3_3810, %none_3811 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_3812 = torch.constant.int 6
    %none_3813 = torch.constant.none
    %false_3814 = torch.constant.bool false
    %4653 = torch.aten.to.dtype %4652, %int6_3812, %false_3814, %false_3814, %none_3813 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4654 = torch.aten.matmul %4653, %4636 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3815 = torch.constant.int 1
    %int2_3816 = torch.constant.int 2
    %4655 = torch.aten.transpose.int %4654, %int1_3815, %int2_3816 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %4656 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3817 = torch.constant.int 1
    %int7_3818 = torch.constant.int 7
    %int1600_3819 = torch.constant.int 1600
    %4657 = torch.prim.ListConstruct %int1_3817, %int7_3818, %int1600_3819 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4658 = torch.aten.reshape %4655, %4657 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %4659 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3820 = torch.constant.int 7
    %int1600_3821 = torch.constant.int 1600
    %4660 = torch.prim.ListConstruct %int7_3820, %int1600_3821 : (!torch.int, !torch.int) -> !torch.list<int>
    %4661 = torch.aten.reshape %4658, %4660 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3822 = torch.constant.int 0
    %int1_3823 = torch.constant.int 1
    %4662 = torch.aten.mm %4661, %558 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4663 = torch.aten.add.Tensor %4662, %559, %int1_3823 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4664 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3824 = torch.constant.int 1
    %int7_3825 = torch.constant.int 7
    %int1600_3826 = torch.constant.int 1600
    %4665 = torch.prim.ListConstruct %int1_3824, %int7_3825, %int1600_3826 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4666 = torch.aten.reshape %4663, %4665 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3827 = torch.constant.int 1
    %4667 = torch.aten.add.Tensor %4666, %4611, %int1_3827 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3828 = torch.constant.float 9.9999997473787516E-6
    %int1600_3829 = torch.constant.int 1600
    %4668 = torch.prim.ListConstruct %int1600_3829 : (!torch.int) -> !torch.list<int>
    %result0_3830, %result1_3831, %result2_3832 = torch.aten.native_layer_norm %4667, %4668, %560, %561, %float9.999990e-06_3828 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4669 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3833 = torch.constant.int 7
    %int1600_3834 = torch.constant.int 1600
    %4670 = torch.prim.ListConstruct %int7_3833, %int1600_3834 : (!torch.int, !torch.int) -> !torch.list<int>
    %4671 = torch.aten.reshape %result0_3830, %4670 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3835 = torch.constant.int 0
    %int1_3836 = torch.constant.int 1
    %4672 = torch.aten.mm %4671, %562 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %4673 = torch.aten.add.Tensor %4672, %563, %int1_3836 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %4674 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3837 = torch.constant.int 1
    %int7_3838 = torch.constant.int 7
    %int6400_3839 = torch.constant.int 6400
    %4675 = torch.prim.ListConstruct %int1_3837, %int7_3838, %int6400_3839 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4676 = torch.aten.reshape %4673, %4675 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %4677 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4678 = torch.aten.mul.Tensor %4676, %4677 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4679 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %4680 = torch.aten.pow.Tensor_Tensor %4676, %4679 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4681 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4682 = torch.aten.mul.Tensor %4680, %4681 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_3840 = torch.constant.int 1
    %4683 = torch.aten.add.Tensor %4676, %4682, %int1_3840 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4684 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %4685 = torch.aten.mul.Tensor %4683, %4684 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4686 = torch.aten.tanh %4685 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4687 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_3841 = torch.constant.int 1
    %4688 = torch.aten.add.Tensor %4686, %4687, %int1_3841 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4689 = torch.aten.mul.Tensor %4678, %4688 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4690 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3842 = torch.constant.int 7
    %int6400_3843 = torch.constant.int 6400
    %4691 = torch.prim.ListConstruct %int7_3842, %int6400_3843 : (!torch.int, !torch.int) -> !torch.list<int>
    %4692 = torch.aten.reshape %4689, %4691 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_3844 = torch.constant.int 0
    %int1_3845 = torch.constant.int 1
    %4693 = torch.aten.mm %4692, %564 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4694 = torch.aten.add.Tensor %4693, %565, %int1_3845 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4695 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3846 = torch.constant.int 1
    %int7_3847 = torch.constant.int 7
    %int1600_3848 = torch.constant.int 1600
    %4696 = torch.prim.ListConstruct %int1_3846, %int7_3847, %int1600_3848 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4697 = torch.aten.reshape %4694, %4696 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3849 = torch.constant.int 1
    %4698 = torch.aten.add.Tensor %4667, %4697, %int1_3849 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3850 = torch.constant.float 9.9999997473787516E-6
    %int1600_3851 = torch.constant.int 1600
    %4699 = torch.prim.ListConstruct %int1600_3851 : (!torch.int) -> !torch.list<int>
    %result0_3852, %result1_3853, %result2_3854 = torch.aten.native_layer_norm %4698, %4699, %566, %567, %float9.999990e-06_3850 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4700 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3855 = torch.constant.int 7
    %int1600_3856 = torch.constant.int 1600
    %4701 = torch.prim.ListConstruct %int7_3855, %int1600_3856 : (!torch.int, !torch.int) -> !torch.list<int>
    %4702 = torch.aten.reshape %result0_3852, %4701 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3857 = torch.constant.int 0
    %int1_3858 = torch.constant.int 1
    %4703 = torch.aten.mm %4702, %568 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,4800],f32> -> !torch.vtensor<[7,4800],f32>
    %4704 = torch.aten.add.Tensor %4703, %569, %int1_3858 : !torch.vtensor<[7,4800],f32>, !torch.vtensor<[4800],f32>, !torch.int -> !torch.vtensor<[7,4800],f32>
    %4705 = torch.vtensor.literal(dense<[1, 7, 4800]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3859 = torch.constant.int 1
    %int7_3860 = torch.constant.int 7
    %int4800_3861 = torch.constant.int 4800
    %4706 = torch.prim.ListConstruct %int1_3859, %int7_3860, %int4800_3861 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4707 = torch.aten.reshape %4704, %4706 : !torch.vtensor<[7,4800],f32>, !torch.list<int> -> !torch.vtensor<[1,7,4800],f32>
    %4708 = torch.vtensor.literal(dense<1600> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %4709 = torch.prim.tolist(%4708) : !torch.vtensor<[3],si64> -> !torch.list<int>
    %int2_3862 = torch.constant.int 2
    %4710 = torch.aten.split_with_sizes %4707, %4709, %int2_3862 : !torch.vtensor<[1,7,4800],f32>, !torch.list<int>, !torch.int -> !torch.list<vtensor<[1,7,1600],f32>>
    %4711:3 = torch.prim.ListUnpack %4710 : !torch.list<vtensor<[1,7,1600],f32>> -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>
    %4712 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3863 = torch.constant.int 1
    %int7_3864 = torch.constant.int 7
    %int25_3865 = torch.constant.int 25
    %int64_3866 = torch.constant.int 64
    %4713 = torch.prim.ListConstruct %int1_3863, %int7_3864, %int25_3865, %int64_3866 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4714 = torch.aten.reshape %4711#0, %4713 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3867 = torch.constant.int 1
    %int2_3868 = torch.constant.int 2
    %4715 = torch.aten.transpose.int %4714, %int1_3867, %int2_3868 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4716 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3869 = torch.constant.int 1
    %int7_3870 = torch.constant.int 7
    %int25_3871 = torch.constant.int 25
    %int64_3872 = torch.constant.int 64
    %4717 = torch.prim.ListConstruct %int1_3869, %int7_3870, %int25_3871, %int64_3872 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4718 = torch.aten.reshape %4711#1, %4717 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3873 = torch.constant.int 1
    %int2_3874 = torch.constant.int 2
    %4719 = torch.aten.transpose.int %4718, %int1_3873, %int2_3874 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %4720 = torch.vtensor.literal(dense<[1, 7, 25, 64]> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
    %int1_3875 = torch.constant.int 1
    %int7_3876 = torch.constant.int 7
    %int25_3877 = torch.constant.int 25
    %int64_3878 = torch.constant.int 64
    %4721 = torch.prim.ListConstruct %int1_3875, %int7_3876, %int25_3877, %int64_3878 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4722 = torch.aten.reshape %4711#2, %4721 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,25,64],f32>
    %int1_3879 = torch.constant.int 1
    %int2_3880 = torch.constant.int 2
    %4723 = torch.aten.transpose.int %4722, %int1_3879, %int2_3880 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3881 = torch.constant.int 1
    %int2_3882 = torch.constant.int 2
    %4724 = torch.aten.transpose.int %4718, %int1_3881, %int2_3882 : !torch.vtensor<[1,7,25,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,7,64],f32>
    %int2_3883 = torch.constant.int 2
    %int3_3884 = torch.constant.int 3
    %4725 = torch.aten.transpose.int %4724, %int2_3883, %int3_3884 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,25,64,7],f32>
    %4726 = torch.aten.matmul %4715, %4725 : !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,64,7],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4727 = torch.vtensor.literal(dense<6.400000e+01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4728 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4729 = torch.aten.pow.Tensor_Tensor %4727, %4728 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
    %4730 = torch.vtensor.literal(dense<> : tensor<0xsi64>) : !torch.vtensor<[0],si64>
    %int0_3885 = torch.constant.int 0
    %4731 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %none_3886 = torch.constant.none
    %float0.000000e00_3887 = torch.constant.float 0.000000e+00
    %4732 = torch.aten.full %4731, %float0.000000e00_3887, %none_3886, %none_3886, %none_3886, %none_3886 : !torch.list<int>, !torch.float, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[],f32>
    %int1_3888 = torch.constant.int 1
    %4733 = torch.aten.add.Tensor %4732, %4729, %int1_3888 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %4734 = torch.aten.div.Tensor %4726, %4733 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %4735 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x1x7x7xi1>) : !torch.vtensor<[1,1,7,7],i1>
    %int6_3889 = torch.constant.int 6
    %none_3890 = torch.constant.none
    %false_3891 = torch.constant.bool false
    %4736 = torch.aten.to.dtype %4734, %int6_3889, %false_3891, %false_3891, %none_3890 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4737 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %4738 = torch.aten.where.self %4735, %4736, %4737 : !torch.vtensor<[1,1,7,7],i1>, !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,25,7,7],f32>
    %int3_3892 = torch.constant.int 3
    %none_3893 = torch.constant.none
    %4739 = torch.aten.softmax.int %4738, %int3_3892, %none_3893 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %int6_3894 = torch.constant.int 6
    %none_3895 = torch.constant.none
    %false_3896 = torch.constant.bool false
    %4740 = torch.aten.to.dtype %4739, %int6_3894, %false_3896, %false_3896, %none_3895 : !torch.vtensor<[1,25,7,7],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,25,7,7],f32>
    %4741 = torch.aten.matmul %4740, %4723 : !torch.vtensor<[1,25,7,7],f32>, !torch.vtensor<[1,25,7,64],f32> -> !torch.vtensor<[1,25,7,64],f32>
    %int1_3897 = torch.constant.int 1
    %int2_3898 = torch.constant.int 2
    %4742 = torch.aten.transpose.int %4741, %int1_3897, %int2_3898 : !torch.vtensor<[1,25,7,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,7,25,64],f32>
    %4743 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3899 = torch.constant.int 1
    %int7_3900 = torch.constant.int 7
    %int1600_3901 = torch.constant.int 1600
    %4744 = torch.prim.ListConstruct %int1_3899, %int7_3900, %int1600_3901 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4745 = torch.aten.reshape %4742, %4744 : !torch.vtensor<[1,7,25,64],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %4746 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3902 = torch.constant.int 7
    %int1600_3903 = torch.constant.int 1600
    %4747 = torch.prim.ListConstruct %int7_3902, %int1600_3903 : (!torch.int, !torch.int) -> !torch.list<int>
    %4748 = torch.aten.reshape %4745, %4747 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3904 = torch.constant.int 0
    %int1_3905 = torch.constant.int 1
    %4749 = torch.aten.mm %4748, %570 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4750 = torch.aten.add.Tensor %4749, %571, %int1_3905 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4751 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3906 = torch.constant.int 1
    %int7_3907 = torch.constant.int 7
    %int1600_3908 = torch.constant.int 1600
    %4752 = torch.prim.ListConstruct %int1_3906, %int7_3907, %int1600_3908 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4753 = torch.aten.reshape %4750, %4752 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3909 = torch.constant.int 1
    %4754 = torch.aten.add.Tensor %4753, %4698, %int1_3909 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3910 = torch.constant.float 9.9999997473787516E-6
    %int1600_3911 = torch.constant.int 1600
    %4755 = torch.prim.ListConstruct %int1600_3911 : (!torch.int) -> !torch.list<int>
    %result0_3912, %result1_3913, %result2_3914 = torch.aten.native_layer_norm %4754, %4755, %572, %573, %float9.999990e-06_3910 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4756 = torch.vtensor.literal(dense<[-1, 1600]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3915 = torch.constant.int 7
    %int1600_3916 = torch.constant.int 1600
    %4757 = torch.prim.ListConstruct %int7_3915, %int1600_3916 : (!torch.int, !torch.int) -> !torch.list<int>
    %4758 = torch.aten.reshape %result0_3912, %4757 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[7,1600],f32>
    %int0_3917 = torch.constant.int 0
    %int1_3918 = torch.constant.int 1
    %4759 = torch.aten.mm %4758, %574 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600,6400],f32> -> !torch.vtensor<[7,6400],f32>
    %4760 = torch.aten.add.Tensor %4759, %575, %int1_3918 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400],f32>, !torch.int -> !torch.vtensor<[7,6400],f32>
    %4761 = torch.vtensor.literal(dense<[1, 7, 6400]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3919 = torch.constant.int 1
    %int7_3920 = torch.constant.int 7
    %int6400_3921 = torch.constant.int 6400
    %4762 = torch.prim.ListConstruct %int1_3919, %int7_3920, %int6400_3921 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4763 = torch.aten.reshape %4760, %4762 : !torch.vtensor<[7,6400],f32>, !torch.list<int> -> !torch.vtensor<[1,7,6400],f32>
    %4764 = torch.vtensor.literal(dense<5.000000e-01> : tensor<f32>) : !torch.vtensor<[],f32>
    %4765 = torch.aten.mul.Tensor %4763, %4764 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4766 = torch.vtensor.literal(dense<3.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %4767 = torch.aten.pow.Tensor_Tensor %4763, %4766 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4768 = torch.vtensor.literal(dense<4.471500e-02> : tensor<f32>) : !torch.vtensor<[],f32>
    %4769 = torch.aten.mul.Tensor %4767, %4768 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %int1_3922 = torch.constant.int 1
    %4770 = torch.aten.add.Tensor %4763, %4769, %int1_3922 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4771 = torch.vtensor.literal(dense<0.797884583> : tensor<f32>) : !torch.vtensor<[],f32>
    %4772 = torch.aten.mul.Tensor %4770, %4771 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4773 = torch.aten.tanh %4772 : !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4774 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32>
    %int1_3923 = torch.constant.int 1
    %4775 = torch.aten.add.Tensor %4773, %4774, %int1_3923 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1,7,6400],f32>
    %4776 = torch.aten.mul.Tensor %4765, %4775 : !torch.vtensor<[1,7,6400],f32>, !torch.vtensor<[1,7,6400],f32> -> !torch.vtensor<[1,7,6400],f32>
    %4777 = torch.vtensor.literal(dense<[-1, 6400]> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
    %int7_3924 = torch.constant.int 7
    %int6400_3925 = torch.constant.int 6400
    %4778 = torch.prim.ListConstruct %int7_3924, %int6400_3925 : (!torch.int, !torch.int) -> !torch.list<int>
    %4779 = torch.aten.reshape %4776, %4778 : !torch.vtensor<[1,7,6400],f32>, !torch.list<int> -> !torch.vtensor<[7,6400],f32>
    %int0_3926 = torch.constant.int 0
    %int1_3927 = torch.constant.int 1
    %4780 = torch.aten.mm %4779, %576 : !torch.vtensor<[7,6400],f32>, !torch.vtensor<[6400,1600],f32> -> !torch.vtensor<[7,1600],f32>
    %4781 = torch.aten.add.Tensor %4780, %577, %int1_3927 : !torch.vtensor<[7,1600],f32>, !torch.vtensor<[1600],f32>, !torch.int -> !torch.vtensor<[7,1600],f32>
    %4782 = torch.vtensor.literal(dense<[1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3928 = torch.constant.int 1
    %int7_3929 = torch.constant.int 7
    %int1600_3930 = torch.constant.int 1600
    %4783 = torch.prim.ListConstruct %int1_3928, %int7_3929, %int1600_3930 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4784 = torch.aten.reshape %4781, %4783 : !torch.vtensor<[7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %int1_3931 = torch.constant.int 1
    %4785 = torch.aten.add.Tensor %4754, %4784, %int1_3931 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1600],f32>, !torch.int -> !torch.vtensor<[1,7,1600],f32>
    %float9.999990e-06_3932 = torch.constant.float 9.9999997473787516E-6
    %int1600_3933 = torch.constant.int 1600
    %4786 = torch.prim.ListConstruct %int1600_3933 : (!torch.int) -> !torch.list<int>
    %result0_3934, %result1_3935, %result2_3936 = torch.aten.native_layer_norm %4785, %4786, %578, %579, %float9.999990e-06_3932 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int>, !torch.vtensor<[1600],f32>, !torch.vtensor<[1600],f32>, !torch.float -> !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1,7,1],f32>, !torch.vtensor<[1,7,1],f32>
    %4787 = torch.vtensor.literal(dense<[-1, 7, 1600]> : tensor<3xsi64>) : !torch.vtensor<[3],si64>
    %int1_3937 = torch.constant.int 1
    %int7_3938 = torch.constant.int 7
    %int1600_3939 = torch.constant.int 1600
    %4788 = torch.prim.ListConstruct %int1_3937, %int7_3938, %int1600_3939 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4789 = torch.aten.reshape %result0_3934, %4788 : !torch.vtensor<[1,7,1600],f32>, !torch.list<int> -> !torch.vtensor<[1,7,1600],f32>
    %4790 = torch.aten.matmul %4789, %580 : !torch.vtensor<[1,7,1600],f32>, !torch.vtensor<[1600,50257],f32> -> !torch.vtensor<[1,7,50257],f32>
    return %4790, %630, %634, %717, %721, %804, %808, %891, %895, %978, %982, %1065, %1069, %1152, %1156, %1239, %1243, %1326, %1330, %1413, %1417, %1500, %1504, %1587, %1591, %1674, %1678, %1761, %1765, %1848, %1852, %1935, %1939, %2022, %2026, %2109, %2113, %2196, %2200, %2283, %2287, %2370, %2374, %2457, %2461, %2544, %2548, %2631, %2635, %2718, %2722, %2805, %2809, %2892, %2896, %2979, %2983, %3066, %3070, %3153, %3157, %3240, %3244, %3327, %3331, %3414, %3418, %3501, %3505, %3588, %3592, %3675, %3679, %3762, %3766, %3849, %3853, %3936, %3940, %4023, %4027, %4110, %4114, %4197, %4201, %4284, %4288, %4371, %4375, %4458, %4462, %4545, %4549, %4632, %4636, %4719, %4723 : !torch.vtensor<[1,7,50257],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>, !torch.vtensor<[1,25,7,64],f32>
  }
 }