AmosLewis · February 15, 2023 00:42
diff --git a/t5_small_torchscript2torchbackend_debug.txt_ b/t5_small_torchscript2torchbackend_debug.txt_
 ➜  ~ torch-mlir-opt -pass-pipeline='builtin.module(torchscript-module-to-torch-backend-pipeline{backend-legal-ops=torch.aten.flatten.using_ints,torch.aten.native_layer_norm,torch.aten.linear})' /tmp/_lambda.mlir --mlir-print-ir-after-failure -mlir-disable-threading

 <eval_with_key>.2:5:16: error: unsupported by backend contract: tensor with unknown rank
 <eval_with_key>.2:5:16: note: see current operation: %36 = "torch.tensor_static_info_cast"(%35) : (!torch.vtensor<[1,4],si64>) -> !torch.vtensor<*,si64>
 <eval_with_key>.2:5:16: note: this is likely due to a missing transfer function in abstract_interp_lib_gen.py
 // -----// IR Dump After LowerToBackendContract Failed (torch-lower-to-backend-contract) //----- //
 module attributes {torch.debug_module_name = "_lambda"} {
  func.func @forward(%arg0: !torch.vtensor<[1,15],si64>, %arg1: !torch.vtensor<[1,4],si64>) -> !torch.vtensor<[1,4,32128],f32> {
    %int512 = torch.constant.int 512
    %int1 = torch.constant.int 1
    %int3 = torch.constant.int 3
    %int0 = torch.constant.int 0
    %int-1 = torch.constant.int -1
    %true = torch.constant.bool true
    %int4 = torch.constant.int 4
    %false = torch.constant.bool false
    %none = torch.constant.none
    %int15 = torch.constant.int 15
    %0 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.vtensor<[512,2048],f32>
    %1 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.vtensor<[2048,512],f32>
    %2 = torch.vtensor.literal(dense_resource<__elided__> : tensor<32x8xf32>) : !torch.vtensor<[32,8],f32>
    %3 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.vtensor<[512,512],f32>
    %4 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.vtensor<[512],f32>
    %5 = torch.vtensor.literal(dense_resource<__elided__> : tensor<32128x512xf32>) : !torch.vtensor<[32128,512],f32>
    %6 = torch.vtensor.literal(dense<0> : tensor<si64>) : !torch.vtensor<[],si64>
    %int-100 = torch.constant.int -100
    %float-3.402820e38 = torch.constant.float -3.4028234663852886E+38
    %int6 = torch.constant.int 6
    %int9223372036854775807 = torch.constant.int 9223372036854775807
    %int2 = torch.constant.int 2
    %float1.000000e00 = torch.constant.float 1.000000e+00
    %float9.999990e-07 = torch.constant.float 9.9999999999999995E-7
    %int8 = torch.constant.int 8
    %int64 = torch.constant.int 64
    %int16 = torch.constant.int 16
    %float2.772590e00 = torch.constant.float 2.7725887222397811
    %int2048 = torch.constant.int 2048
    %float2.079440e00 = torch.constant.float 2.0794415416798357
    %int31 = torch.constant.int 31
    %float4.419420e-02 = torch.constant.float 0.044194173824159223
    %int32128 = torch.constant.int 32128
    %cpu = torch.constant.device "cpu"
    %7 = torch.prim.ListConstruct %int1, %int4 : (!torch.int, !torch.int) -> !torch.list<int>
    %8 = torch.aten.zeros %7, %int4, %int0, %cpu, %false : !torch.list<int>, !torch.int, !torch.int, !torch.Device, !torch.bool -> !torch.vtensor<[1,4],si64>
    %9 = torch.tensor_static_info_cast %8 : !torch.vtensor<[1,4],si64> to !torch.vtensor<*,si64>
    %10 = torch.copy.to_tensor %9 : !torch.tensor<*,si64>
    %11 = torch.aten.slice.Tensor %arg1, %int1, %int0, %int-1, %int1 : !torch.vtensor<[1,4],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,3],si64>
    %12 = torch.aten.clone %11, %none : !torch.vtensor<[1,3],si64>, !torch.none -> !torch.vtensor<[1,3],si64>
    %13 = torch.aten.slice.Tensor %10, %int1, %int1, %int9223372036854775807, %int1 : !torch.tensor<*,si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor<[1,3],si64>
    %14 = torch.tensor_static_info_cast %13 : !torch.tensor<[1,3],si64> to !torch.tensor<*,si64>
    %15 = torch.prim.ListConstruct %int1, %int3 : (!torch.int, !torch.int) -> !torch.list<int>
    %16 = torch.aten.broadcast_to %12, %15 : !torch.vtensor<[1,3],si64>, !torch.list<int> -> !torch.vtensor<[1,3],si64>
    %17 = torch.tensor_static_info_cast %16 : !torch.vtensor<[1,3],si64> to !torch.vtensor<*,si64>
    torch.overwrite.tensor.contents %17 overwrites %14 : !torch.vtensor<*,si64>, !torch.tensor<*,si64>
    %18 = torch.aten.clone %6, %none : !torch.vtensor<[],si64>, !torch.none -> !torch.vtensor<[],si64>
    %19 = torch.aten.slice.Tensor %10, %int1, %int0, %int1, %int1 : !torch.tensor<*,si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor<[1,1],si64>
    %20 = torch.aten.squeeze.dim %19, %int1 : !torch.tensor<[1,1],si64>, !torch.int -> !torch.tensor<[1],si64>
    %21 = torch.tensor_static_info_cast %20 : !torch.tensor<[1],si64> to !torch.tensor<*,si64>
    %22 = torch.copy.to_vtensor %21 : !torch.vtensor<*,si64>
    %23 = torch.aten.fill.Tensor %22, %18 : !torch.vtensor<*,si64>, !torch.vtensor<[],si64> -> !torch.vtensor<[1],si64>
    %24 = torch.tensor_static_info_cast %23 : !torch.vtensor<[1],si64> to !torch.vtensor<*,si64>
    torch.overwrite.tensor.contents %24 overwrites %21 : !torch.vtensor<*,si64>, !torch.tensor<*,si64>
    %25 = torch.copy.to_vtensor %10 : !torch.vtensor<*,si64>
    %26 = torch.aten.eq.Scalar %25, %int-100 : !torch.vtensor<*,si64>, !torch.int -> !torch.vtensor<[1,4],i1>
    %27 = torch.copy.to_vtensor %10 : !torch.vtensor<*,si64>
    %28 = torch.prim.ListConstruct  : () -> !torch.list<int>
    %29 = torch.prim.NumToTensor.Scalar %int0 : !torch.int -> !torch.vtensor<[],si64>
    %30 = torch.aten.broadcast_to %29, %28 : !torch.vtensor<[],si64>, !torch.list<int> -> !torch.vtensor<[],si64>
    %31 = torch.aten.where.self %26, %30, %27 : !torch.vtensor<[1,4],i1>, !torch.vtensor<[],si64>, !torch.vtensor<*,si64> -> !torch.vtensor<[1,4],si64>
    %32 = torch.tensor_static_info_cast %31 : !torch.vtensor<[1,4],si64> to !torch.vtensor<*,si64>
    torch.overwrite.tensor.contents %32 overwrites %10 : !torch.vtensor<*,si64>, !torch.tensor<*,si64>
    %33 = torch.prim.ListConstruct %int-1, %int15 : (!torch.int, !torch.int) -> !torch.list<int>
    %34 = torch.aten.view %arg0, %33 : !torch.vtensor<[1,15],si64>, !torch.list<int> -> !torch.vtensor<[1,15],si64>
    %35 = torch.aten.embedding %5, %34, %int-1, %false, %false : !torch.vtensor<[32128,512],f32>, !torch.vtensor<[1,15],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,15,512],f32>
    %36 = torch.prim.ListConstruct %int1, %int15 : (!torch.int, !torch.int) -> !torch.list<int>
    %37 = torch.aten.ones %36, %none, %none, %cpu, %false : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[1,15],f32>
    %38 = torch.aten.unsqueeze %37, %int1 : !torch.vtensor<[1,15],f32>, !torch.int -> !torch.vtensor<[1,1,15],f32>
    %39 = torch.aten.unsqueeze %38, %int2 : !torch.vtensor<[1,1,15],f32>, !torch.int -> !torch.vtensor<[1,1,1,15],f32>
    %40 = torch.aten.rsub.Scalar %39, %float1.000000e00, %int1 : !torch.vtensor<[1,1,1,15],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,1,1,15],f32>
    %41 = torch.aten.mul.Scalar %40, %float-3.402820e38 : !torch.vtensor<[1,1,1,15],f32>, !torch.float -> !torch.vtensor<[1,1,1,15],f32>
    %42 = torch.aten.pow.Tensor_Scalar %35, %int2 : !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %43 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %44 = torch.aten.sum.dim_IntList %42, %43, %true, %none : !torch.vtensor<[1,15,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,15,1],f32>
    %45 = torch.aten.div.Scalar %44, %int512 : !torch.vtensor<[1,15,1],f32>, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %46 = torch.aten.add.Scalar %45, %float9.999990e-07, %int1 : !torch.vtensor<[1,15,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %47 = torch.aten.rsqrt %46 : !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,1],f32>
    %48 = torch.aten.mul.Tensor %35, %47 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,512],f32>
    %49 = torch.aten.mul.Tensor %4, %48 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,15,512],f32> -> !torch.vtensor<[1,15,512],f32>
    %50 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %51 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %52 = torch.aten.view %49, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %53 = torch.aten.mm %52, %50 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %54 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55 = torch.aten.view %53, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %56 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57 = torch.aten.view %55, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %58 = torch.aten.transpose.int %57, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %59 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %60 = torch.aten.view %49, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %61 = torch.aten.mm %60, %59 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %62 = torch.aten.view %61, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %63 = torch.aten.view %62, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %64 = torch.aten.transpose.int %63, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %65 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %66 = torch.aten.view %49, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %67 = torch.aten.mm %66, %65 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %68 = torch.aten.view %67, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %69 = torch.aten.view %68, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %70 = torch.aten.transpose.int %69, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %71 = torch.aten.transpose.int %64, %int3, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,15],f32>
    %72 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %73 = torch.aten.broadcast_to %58, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %74 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %75 = torch.aten.view %73, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %76 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %77 = torch.aten.broadcast_to %71, %76 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,15],f32>
    %78 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %79 = torch.aten.view %77, %78 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[8,64,15],f32>
    %80 = torch.aten.bmm %75, %79 : !torch.vtensor<[8,15,64],f32>, !torch.vtensor<[8,64,15],f32> -> !torch.vtensor<[8,15,15],f32>
    %81 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %82 = torch.aten.view %80, %81 : !torch.vtensor<[8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,15],f32>
    %83 = torch.aten.arange.start_step %int0, %int15, %int1, %int4, %none, %cpu, %false : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[15],si64>
    %84 = torch.aten.unsqueeze %83, %int1 : !torch.vtensor<[15],si64>, !torch.int -> !torch.vtensor<[15,1],si64>
    %85 = torch.aten.arange.start_step %int0, %int15, %int1, %int4, %none, %cpu, %false : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[15],si64>
    %86 = torch.aten.unsqueeze %85, %int0 : !torch.vtensor<[15],si64>, !torch.int -> !torch.vtensor<[1,15],si64>
    %87 = torch.aten.sub.Tensor %86, %84, %int1 : !torch.vtensor<[1,15],si64>, !torch.vtensor<[15,1],si64>, !torch.int -> !torch.vtensor<[15,15],si64>
    %88 = torch.aten.gt.Scalar %87, %int0 : !torch.vtensor<[15,15],si64>, !torch.int -> !torch.vtensor<[15,15],i1>
    %89 = torch.aten.to.dtype %88, %int4, %false, %false, %none : !torch.vtensor<[15,15],i1>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[15,15],si64>
    %90 = torch.aten.mul.Scalar %89, %int16 : !torch.vtensor<[15,15],si64>, !torch.int -> !torch.vtensor<[15,15],si64>
    %91 = torch.aten.add.Scalar %90, %int0, %int1 : !torch.vtensor<[15,15],si64>, !torch.int, !torch.int -> !torch.vtensor<[15,15],si64>
    %92 = torch.aten.abs %87 : !torch.vtensor<[15,15],si64> -> !torch.vtensor<[15,15],si64>
    %93 = torch.aten.lt.Scalar %92, %int8 : !torch.vtensor<[15,15],si64>, !torch.int -> !torch.vtensor<[15,15],i1>
    %94 = torch.aten.to.dtype %92, %int6, %false, %false, %none : !torch.vtensor<[15,15],si64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[15,15],f32>
    %95 = torch.aten.div.Scalar %94, %int8 : !torch.vtensor<[15,15],f32>, !torch.int -> !torch.vtensor<[15,15],f32>
    %96 = torch.aten.log %95 : !torch.vtensor<[15,15],f32> -> !torch.vtensor<[15,15],f32>
    %97 = torch.aten.div.Scalar %96, %float2.772590e00 : !torch.vtensor<[15,15],f32>, !torch.float -> !torch.vtensor<[15,15],f32>
    %98 = torch.aten.mul.Scalar %97, %int8 : !torch.vtensor<[15,15],f32>, !torch.int -> !torch.vtensor<[15,15],f32>
    %99 = torch.aten.to.dtype %98, %int4, %false, %false, %none : !torch.vtensor<[15,15],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[15,15],si64>
    %100 = torch.aten.add.Scalar %99, %int8, %int1 : !torch.vtensor<[15,15],si64>, !torch.int, !torch.int -> !torch.vtensor<[15,15],si64>
    %101 = torch.prim.NumToTensor.Scalar %int15 : !torch.int -> !torch.vtensor<[],si64>
    %102 = torch.prim.ListConstruct %int15, %int15 : (!torch.int, !torch.int) -> !torch.list<int>
    %103 = torch.aten.broadcast_to %101, %102 : !torch.vtensor<[],si64>, !torch.list<int> -> !torch.vtensor<[15,15],si64>
    %104 = torch.aten.minimum %100, %103 : !torch.vtensor<[15,15],si64>, !torch.vtensor<[15,15],si64> -> !torch.vtensor<[15,15],si64>
    %105 = torch.aten.where.self %93, %92, %104 : !torch.vtensor<[15,15],i1>, !torch.vtensor<[15,15],si64>, !torch.vtensor<[15,15],si64> -> !torch.vtensor<[15,15],si64>
    %106 = torch.aten.add.Tensor %91, %105, %int1 : !torch.vtensor<[15,15],si64>, !torch.vtensor<[15,15],si64>, !torch.int -> !torch.vtensor<[15,15],si64>
    %107 = torch.aten.embedding %2, %106, %int-1, %false, %false : !torch.vtensor<[32,8],f32>, !torch.vtensor<[15,15],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[15,15,8],f32>
    %108 = torch.prim.ListConstruct %int2, %int0, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %109 = torch.aten.permute %107, %108 : !torch.vtensor<[15,15,8],f32>, !torch.list<int> -> !torch.vtensor<[8,15,15],f32>
    %110 = torch.aten.unsqueeze %109, %int0 : !torch.vtensor<[8,15,15],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %111 = torch.aten.add.Tensor %110, %41, %int1 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,1,1,15],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %112 = torch.aten.add.Tensor %82, %111, %int1 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,15],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %values, %indices = torch.aten.max.dim %112, %int-1, %true : !torch.vtensor<[1,8,15,15],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,15,1],f32>, !torch.vtensor<[1,8,15,1],si64>
    %113 = torch.aten.sub.Tensor %112, %values, %int1 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,1],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %114 = torch.aten.exp %113 : !torch.vtensor<[1,8,15,15],f32> -> !torch.vtensor<[1,8,15,15],f32>
    %115 = torch.aten.sum.dim_IntList %114, %43, %true, %none : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,15,1],f32>
    %116 = torch.aten.div.Tensor %114, %115 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,1],f32> -> !torch.vtensor<[1,8,15,15],f32>
    %117 = torch.aten.broadcast_to %116, %81 : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,15],f32>
    %118 = torch.prim.ListConstruct %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %119 = torch.aten.view %117, %118 : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[8,15,15],f32>
    %120 = torch.aten.broadcast_to %70, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %121 = torch.aten.view %120, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %122 = torch.aten.bmm %119, %121 : !torch.vtensor<[8,15,15],f32>, !torch.vtensor<[8,15,64],f32> -> !torch.vtensor<[8,15,64],f32>
    %123 = torch.aten.view %122, %72 : !torch.vtensor<[8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %124 = torch.aten.transpose.int %123, %int1, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,15,8,64],f32>
    %125 = torch.aten.clone %124, %int0 : !torch.vtensor<[1,15,8,64],f32>, !torch.int -> !torch.vtensor<[1,15,8,64],f32>
    %126 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %127 = torch.aten.view %125, %126 : !torch.vtensor<[1,15,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %128 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %129 = torch.aten.view %127, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %130 = torch.aten.mm %129, %128 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %131 = torch.aten.view %130, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %132 = torch.aten.add.Tensor %35, %131, %int1 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %133 = torch.aten.pow.Tensor_Scalar %132, %int2 : !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %134 = torch.aten.sum.dim_IntList %133, %43, %true, %none : !torch.vtensor<[1,15,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,15,1],f32>
    %135 = torch.aten.div.Scalar %134, %int512 : !torch.vtensor<[1,15,1],f32>, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %136 = torch.aten.add.Scalar %135, %float9.999990e-07, %int1 : !torch.vtensor<[1,15,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %137 = torch.aten.rsqrt %136 : !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,1],f32>
    %138 = torch.aten.mul.Tensor %132, %137 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,512],f32>
    %139 = torch.aten.mul.Tensor %4, %138 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,15,512],f32> -> !torch.vtensor<[1,15,512],f32>
    %140 = torch.aten.transpose.int %1, %int0, %int1 : !torch.vtensor<[2048,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,2048],f32>
    %141 = torch.aten.view %139, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %142 = torch.aten.mm %141, %140 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,2048],f32> -> !torch.vtensor<[15,2048],f32>
    %143 = torch.prim.ListConstruct %int1, %int15, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %144 = torch.aten.view %142, %143 : !torch.vtensor<[15,2048],f32>, !torch.list<int> -> !torch.vtensor<[1,15,2048],f32>
    %145 = torch.aten.relu %144 : !torch.vtensor<[1,15,2048],f32> -> !torch.vtensor<[1,15,2048],f32>
    %146 = torch.aten.transpose.int %0, %int0, %int1 : !torch.vtensor<[512,2048],f32>, !torch.int, !torch.int -> !torch.vtensor<[2048,512],f32>
    %147 = torch.prim.ListConstruct %int15, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %148 = torch.aten.view %145, %147 : !torch.vtensor<[1,15,2048],f32>, !torch.list<int> -> !torch.vtensor<[15,2048],f32>
    %149 = torch.aten.mm %148, %146 : !torch.vtensor<[15,2048],f32>, !torch.vtensor<[2048,512],f32> -> !torch.vtensor<[15,512],f32>
    %150 = torch.aten.view %149, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %151 = torch.aten.add.Tensor %132, %150, %int1 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %152 = torch.aten.pow.Tensor_Scalar %151, %int2 : !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %153 = torch.aten.sum.dim_IntList %152, %43, %true, %none : !torch.vtensor<[1,15,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,15,1],f32>
    %154 = torch.aten.div.Scalar %153, %int512 : !torch.vtensor<[1,15,1],f32>, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %155 = torch.aten.add.Scalar %154, %float9.999990e-07, %int1 : !torch.vtensor<[1,15,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %156 = torch.aten.rsqrt %155 : !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,1],f32>
    %157 = torch.aten.mul.Tensor %151, %156 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,512],f32>
    %158 = torch.aten.mul.Tensor %4, %157 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,15,512],f32> -> !torch.vtensor<[1,15,512],f32>
    %159 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %160 = torch.aten.view %158, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %161 = torch.aten.mm %160, %159 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %162 = torch.aten.view %161, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %163 = torch.aten.view %162, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %164 = torch.aten.transpose.int %163, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %165 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %166 = torch.aten.view %158, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %167 = torch.aten.mm %166, %165 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %168 = torch.aten.view %167, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %169 = torch.aten.view %168, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %170 = torch.aten.transpose.int %169, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %171 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %172 = torch.aten.view %158, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %173 = torch.aten.mm %172, %171 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %174 = torch.aten.view %173, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %175 = torch.aten.view %174, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %176 = torch.aten.transpose.int %175, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %177 = torch.aten.transpose.int %170, %int3, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,15],f32>
    %178 = torch.aten.broadcast_to %164, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %179 = torch.aten.view %178, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %180 = torch.aten.broadcast_to %177, %76 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,15],f32>
    %181 = torch.aten.view %180, %78 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[8,64,15],f32>
    %182 = torch.aten.bmm %179, %181 : !torch.vtensor<[8,15,64],f32>, !torch.vtensor<[8,64,15],f32> -> !torch.vtensor<[8,15,15],f32>
    %183 = torch.aten.view %182, %81 : !torch.vtensor<[8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,15],f32>
    %184 = torch.aten.add.Tensor %183, %111, %int1 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,15],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %values_0, %indices_1 = torch.aten.max.dim %184, %int-1, %true : !torch.vtensor<[1,8,15,15],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,15,1],f32>, !torch.vtensor<[1,8,15,1],si64>
    %185 = torch.aten.sub.Tensor %184, %values_0, %int1 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,1],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %186 = torch.aten.exp %185 : !torch.vtensor<[1,8,15,15],f32> -> !torch.vtensor<[1,8,15,15],f32>
    %187 = torch.aten.sum.dim_IntList %186, %43, %true, %none : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,15,1],f32>
    %188 = torch.aten.div.Tensor %186, %187 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,1],f32> -> !torch.vtensor<[1,8,15,15],f32>
    %189 = torch.aten.broadcast_to %188, %81 : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,15],f32>
    %190 = torch.aten.view %189, %118 : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[8,15,15],f32>
    %191 = torch.aten.broadcast_to %176, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %192 = torch.aten.view %191, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %193 = torch.aten.bmm %190, %192 : !torch.vtensor<[8,15,15],f32>, !torch.vtensor<[8,15,64],f32> -> !torch.vtensor<[8,15,64],f32>
    %194 = torch.aten.view %193, %72 : !torch.vtensor<[8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %195 = torch.aten.transpose.int %194, %int1, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,15,8,64],f32>
    %196 = torch.aten.clone %195, %int0 : !torch.vtensor<[1,15,8,64],f32>, !torch.int -> !torch.vtensor<[1,15,8,64],f32>
    %197 = torch.aten.view %196, %126 : !torch.vtensor<[1,15,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %198 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %199 = torch.aten.view %197, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %200 = torch.aten.mm %199, %198 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %201 = torch.aten.view %200, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %202 = torch.aten.add.Tensor %151, %201, %int1 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %203 = torch.aten.pow.Tensor_Scalar %202, %int2 : !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %204 = torch.aten.sum.dim_IntList %203, %43, %true, %none : !torch.vtensor<[1,15,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,15,1],f32>
    %205 = torch.aten.div.Scalar %204, %int512 : !torch.vtensor<[1,15,1],f32>, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %206 = torch.aten.add.Scalar %205, %float9.999990e-07, %int1 : !torch.vtensor<[1,15,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %207 = torch.aten.rsqrt %206 : !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,1],f32>
    %208 = torch.aten.mul.Tensor %202, %207 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,512],f32>
    %209 = torch.aten.mul.Tensor %4, %208 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,15,512],f32> -> !torch.vtensor<[1,15,512],f32>
    %210 = torch.aten.transpose.int %1, %int0, %int1 : !torch.vtensor<[2048,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,2048],f32>
    %211 = torch.aten.view %209, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %212 = torch.aten.mm %211, %210 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,2048],f32> -> !torch.vtensor<[15,2048],f32>
    %213 = torch.aten.view %212, %143 : !torch.vtensor<[15,2048],f32>, !torch.list<int> -> !torch.vtensor<[1,15,2048],f32>
    %214 = torch.aten.relu %213 : !torch.vtensor<[1,15,2048],f32> -> !torch.vtensor<[1,15,2048],f32>
    %215 = torch.aten.transpose.int %0, %int0, %int1 : !torch.vtensor<[512,2048],f32>, !torch.int, !torch.int -> !torch.vtensor<[2048,512],f32>
    %216 = torch.aten.view %214, %147 : !torch.vtensor<[1,15,2048],f32>, !torch.list<int> -> !torch.vtensor<[15,2048],f32>
    %217 = torch.aten.mm %216, %215 : !torch.vtensor<[15,2048],f32>, !torch.vtensor<[2048,512],f32> -> !torch.vtensor<[15,512],f32>
    %218 = torch.aten.view %217, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %219 = torch.aten.add.Tensor %202, %218, %int1 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %220 = torch.aten.pow.Tensor_Scalar %219, %int2 : !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %221 = torch.aten.sum.dim_IntList %220, %43, %true, %none : !torch.vtensor<[1,15,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,15,1],f32>
    %222 = torch.aten.div.Scalar %221, %int512 : !torch.vtensor<[1,15,1],f32>, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %223 = torch.aten.add.Scalar %222, %float9.999990e-07, %int1 : !torch.vtensor<[1,15,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %224 = torch.aten.rsqrt %223 : !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,1],f32>
    %225 = torch.aten.mul.Tensor %219, %224 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,512],f32>
    %226 = torch.aten.mul.Tensor %4, %225 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,15,512],f32> -> !torch.vtensor<[1,15,512],f32>
    %227 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %228 = torch.aten.view %226, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %229 = torch.aten.mm %228, %227 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %230 = torch.aten.view %229, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %231 = torch.aten.view %230, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %232 = torch.aten.transpose.int %231, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %233 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %234 = torch.aten.view %226, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %235 = torch.aten.mm %234, %233 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %236 = torch.aten.view %235, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %237 = torch.aten.view %236, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %238 = torch.aten.transpose.int %237, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %239 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %240 = torch.aten.view %226, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %241 = torch.aten.mm %240, %239 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %242 = torch.aten.view %241, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %243 = torch.aten.view %242, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %244 = torch.aten.transpose.int %243, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %245 = torch.aten.transpose.int %238, %int3, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,15],f32>
    %246 = torch.aten.broadcast_to %232, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %247 = torch.aten.view %246, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %248 = torch.aten.broadcast_to %245, %76 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,15],f32>
    %249 = torch.aten.view %248, %78 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[8,64,15],f32>
    %250 = torch.aten.bmm %247, %249 : !torch.vtensor<[8,15,64],f32>, !torch.vtensor<[8,64,15],f32> -> !torch.vtensor<[8,15,15],f32>
    %251 = torch.aten.view %250, %81 : !torch.vtensor<[8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,15],f32>
    %252 = torch.aten.add.Tensor %251, %111, %int1 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,15],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %values_2, %indices_3 = torch.aten.max.dim %252, %int-1, %true : !torch.vtensor<[1,8,15,15],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,15,1],f32>, !torch.vtensor<[1,8,15,1],si64>
    %253 = torch.aten.sub.Tensor %252, %values_2, %int1 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,1],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %254 = torch.aten.exp %253 : !torch.vtensor<[1,8,15,15],f32> -> !torch.vtensor<[1,8,15,15],f32>
    %255 = torch.aten.sum.dim_IntList %254, %43, %true, %none : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,15,1],f32>
    %256 = torch.aten.div.Tensor %254, %255 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,1],f32> -> !torch.vtensor<[1,8,15,15],f32>
    %257 = torch.aten.broadcast_to %256, %81 : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,15],f32>
    %258 = torch.aten.view %257, %118 : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[8,15,15],f32>
    %259 = torch.aten.broadcast_to %244, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %260 = torch.aten.view %259, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %261 = torch.aten.bmm %258, %260 : !torch.vtensor<[8,15,15],f32>, !torch.vtensor<[8,15,64],f32> -> !torch.vtensor<[8,15,64],f32>
    %262 = torch.aten.view %261, %72 : !torch.vtensor<[8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %263 = torch.aten.transpose.int %262, %int1, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,15,8,64],f32>
    %264 = torch.aten.clone %263, %int0 : !torch.vtensor<[1,15,8,64],f32>, !torch.int -> !torch.vtensor<[1,15,8,64],f32>
    %265 = torch.aten.view %264, %126 : !torch.vtensor<[1,15,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %266 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %267 = torch.aten.view %265, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %268 = torch.aten.mm %267, %266 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %269 = torch.aten.view %268, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %270 = torch.aten.add.Tensor %219, %269, %int1 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %271 = torch.aten.pow.Tensor_Scalar %270, %int2 : !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %272 = torch.aten.sum.dim_IntList %271, %43, %true, %none : !torch.vtensor<[1,15,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,15,1],f32>
    %273 = torch.aten.div.Scalar %272, %int512 : !torch.vtensor<[1,15,1],f32>, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %274 = torch.aten.add.Scalar %273, %float9.999990e-07, %int1 : !torch.vtensor<[1,15,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %275 = torch.aten.rsqrt %274 : !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,1],f32>
    %276 = torch.aten.mul.Tensor %270, %275 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,512],f32>
    %277 = torch.aten.mul.Tensor %4, %276 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,15,512],f32> -> !torch.vtensor<[1,15,512],f32>
    %278 = torch.aten.transpose.int %1, %int0, %int1 : !torch.vtensor<[2048,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,2048],f32>
    %279 = torch.aten.view %277, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %280 = torch.aten.mm %279, %278 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,2048],f32> -> !torch.vtensor<[15,2048],f32>
    %281 = torch.aten.view %280, %143 : !torch.vtensor<[15,2048],f32>, !torch.list<int> -> !torch.vtensor<[1,15,2048],f32>
    %282 = torch.aten.relu %281 : !torch.vtensor<[1,15,2048],f32> -> !torch.vtensor<[1,15,2048],f32>
    %283 = torch.aten.transpose.int %0, %int0, %int1 : !torch.vtensor<[512,2048],f32>, !torch.int, !torch.int -> !torch.vtensor<[2048,512],f32>
    %284 = torch.aten.view %282, %147 : !torch.vtensor<[1,15,2048],f32>, !torch.list<int> -> !torch.vtensor<[15,2048],f32>
    %285 = torch.aten.mm %284, %283 : !torch.vtensor<[15,2048],f32>, !torch.vtensor<[2048,512],f32> -> !torch.vtensor<[15,512],f32>
    %286 = torch.aten.view %285, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %287 = torch.aten.add.Tensor %270, %286, %int1 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %288 = torch.aten.pow.Tensor_Scalar %287, %int2 : !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %289 = torch.aten.sum.dim_IntList %288, %43, %true, %none : !torch.vtensor<[1,15,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,15,1],f32>
    %290 = torch.aten.div.Scalar %289, %int512 : !torch.vtensor<[1,15,1],f32>, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %291 = torch.aten.add.Scalar %290, %float9.999990e-07, %int1 : !torch.vtensor<[1,15,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %292 = torch.aten.rsqrt %291 : !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,1],f32>
    %293 = torch.aten.mul.Tensor %287, %292 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,512],f32>
    %294 = torch.aten.mul.Tensor %4, %293 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,15,512],f32> -> !torch.vtensor<[1,15,512],f32>
    %295 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %296 = torch.aten.view %294, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %297 = torch.aten.mm %296, %295 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %298 = torch.aten.view %297, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %299 = torch.aten.view %298, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %300 = torch.aten.transpose.int %299, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %301 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %302 = torch.aten.view %294, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %303 = torch.aten.mm %302, %301 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %304 = torch.aten.view %303, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %305 = torch.aten.view %304, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %306 = torch.aten.transpose.int %305, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %307 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %308 = torch.aten.view %294, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %309 = torch.aten.mm %308, %307 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %310 = torch.aten.view %309, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %311 = torch.aten.view %310, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %312 = torch.aten.transpose.int %311, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %313 = torch.aten.transpose.int %306, %int3, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,15],f32>
    %314 = torch.aten.broadcast_to %300, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %315 = torch.aten.view %314, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %316 = torch.aten.broadcast_to %313, %76 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,15],f32>
    %317 = torch.aten.view %316, %78 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[8,64,15],f32>
    %318 = torch.aten.bmm %315, %317 : !torch.vtensor<[8,15,64],f32>, !torch.vtensor<[8,64,15],f32> -> !torch.vtensor<[8,15,15],f32>
    %319 = torch.aten.view %318, %81 : !torch.vtensor<[8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,15],f32>
    %320 = torch.aten.add.Tensor %319, %111, %int1 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,15],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %values_4, %indices_5 = torch.aten.max.dim %320, %int-1, %true : !torch.vtensor<[1,8,15,15],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,15,1],f32>, !torch.vtensor<[1,8,15,1],si64>
    %321 = torch.aten.sub.Tensor %320, %values_4, %int1 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,1],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %322 = torch.aten.exp %321 : !torch.vtensor<[1,8,15,15],f32> -> !torch.vtensor<[1,8,15,15],f32>
    %323 = torch.aten.sum.dim_IntList %322, %43, %true, %none : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,15,1],f32>
    %324 = torch.aten.div.Tensor %322, %323 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,1],f32> -> !torch.vtensor<[1,8,15,15],f32>
    %325 = torch.aten.broadcast_to %324, %81 : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,15],f32>
    %326 = torch.aten.view %325, %118 : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[8,15,15],f32>
    %327 = torch.aten.broadcast_to %312, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %328 = torch.aten.view %327, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %329 = torch.aten.bmm %326, %328 : !torch.vtensor<[8,15,15],f32>, !torch.vtensor<[8,15,64],f32> -> !torch.vtensor<[8,15,64],f32>
    %330 = torch.aten.view %329, %72 : !torch.vtensor<[8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %331 = torch.aten.transpose.int %330, %int1, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,15,8,64],f32>
    %332 = torch.aten.clone %331, %int0 : !torch.vtensor<[1,15,8,64],f32>, !torch.int -> !torch.vtensor<[1,15,8,64],f32>
    %333 = torch.aten.view %332, %126 : !torch.vtensor<[1,15,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %334 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %335 = torch.aten.view %333, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %336 = torch.aten.mm %335, %334 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %337 = torch.aten.view %336, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %338 = torch.aten.add.Tensor %287, %337, %int1 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %339 = torch.aten.pow.Tensor_Scalar %338, %int2 : !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %340 = torch.aten.sum.dim_IntList %339, %43, %true, %none : !torch.vtensor<[1,15,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,15,1],f32>
    %341 = torch.aten.div.Scalar %340, %int512 : !torch.vtensor<[1,15,1],f32>, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %342 = torch.aten.add.Scalar %341, %float9.999990e-07, %int1 : !torch.vtensor<[1,15,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %343 = torch.aten.rsqrt %342 : !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,1],f32>
    %344 = torch.aten.mul.Tensor %338, %343 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,512],f32>
    %345 = torch.aten.mul.Tensor %4, %344 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,15,512],f32> -> !torch.vtensor<[1,15,512],f32>
    %346 = torch.aten.transpose.int %1, %int0, %int1 : !torch.vtensor<[2048,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,2048],f32>
    %347 = torch.aten.view %345, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %348 = torch.aten.mm %347, %346 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,2048],f32> -> !torch.vtensor<[15,2048],f32>
    %349 = torch.aten.view %348, %143 : !torch.vtensor<[15,2048],f32>, !torch.list<int> -> !torch.vtensor<[1,15,2048],f32>
    %350 = torch.aten.relu %349 : !torch.vtensor<[1,15,2048],f32> -> !torch.vtensor<[1,15,2048],f32>
    %351 = torch.aten.transpose.int %0, %int0, %int1 : !torch.vtensor<[512,2048],f32>, !torch.int, !torch.int -> !torch.vtensor<[2048,512],f32>
    %352 = torch.aten.view %350, %147 : !torch.vtensor<[1,15,2048],f32>, !torch.list<int> -> !torch.vtensor<[15,2048],f32>
    %353 = torch.aten.mm %352, %351 : !torch.vtensor<[15,2048],f32>, !torch.vtensor<[2048,512],f32> -> !torch.vtensor<[15,512],f32>
    %354 = torch.aten.view %353, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %355 = torch.aten.add.Tensor %338, %354, %int1 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %356 = torch.aten.pow.Tensor_Scalar %355, %int2 : !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %357 = torch.aten.sum.dim_IntList %356, %43, %true, %none : !torch.vtensor<[1,15,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,15,1],f32>
    %358 = torch.aten.div.Scalar %357, %int512 : !torch.vtensor<[1,15,1],f32>, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %359 = torch.aten.add.Scalar %358, %float9.999990e-07, %int1 : !torch.vtensor<[1,15,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %360 = torch.aten.rsqrt %359 : !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,1],f32>
    %361 = torch.aten.mul.Tensor %355, %360 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,512],f32>
    %362 = torch.aten.mul.Tensor %4, %361 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,15,512],f32> -> !torch.vtensor<[1,15,512],f32>
    %363 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %364 = torch.aten.view %362, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %365 = torch.aten.mm %364, %363 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %366 = torch.aten.view %365, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %367 = torch.aten.view %366, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %368 = torch.aten.transpose.int %367, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %369 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %370 = torch.aten.view %362, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %371 = torch.aten.mm %370, %369 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %372 = torch.aten.view %371, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %373 = torch.aten.view %372, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %374 = torch.aten.transpose.int %373, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %375 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %376 = torch.aten.view %362, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %377 = torch.aten.mm %376, %375 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %378 = torch.aten.view %377, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %379 = torch.aten.view %378, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %380 = torch.aten.transpose.int %379, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %381 = torch.aten.transpose.int %374, %int3, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,15],f32>
    %382 = torch.aten.broadcast_to %368, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %383 = torch.aten.view %382, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %384 = torch.aten.broadcast_to %381, %76 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,15],f32>
    %385 = torch.aten.view %384, %78 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[8,64,15],f32>
    %386 = torch.aten.bmm %383, %385 : !torch.vtensor<[8,15,64],f32>, !torch.vtensor<[8,64,15],f32> -> !torch.vtensor<[8,15,15],f32>
    %387 = torch.aten.view %386, %81 : !torch.vtensor<[8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,15],f32>
    %388 = torch.aten.add.Tensor %387, %111, %int1 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,15],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %values_6, %indices_7 = torch.aten.max.dim %388, %int-1, %true : !torch.vtensor<[1,8,15,15],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,15,1],f32>, !torch.vtensor<[1,8,15,1],si64>
    %389 = torch.aten.sub.Tensor %388, %values_6, %int1 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,1],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %390 = torch.aten.exp %389 : !torch.vtensor<[1,8,15,15],f32> -> !torch.vtensor<[1,8,15,15],f32>
    %391 = torch.aten.sum.dim_IntList %390, %43, %true, %none : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,15,1],f32>
    %392 = torch.aten.div.Tensor %390, %391 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,1],f32> -> !torch.vtensor<[1,8,15,15],f32>
    %393 = torch.aten.broadcast_to %392, %81 : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,15],f32>
    %394 = torch.aten.view %393, %118 : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[8,15,15],f32>
    %395 = torch.aten.broadcast_to %380, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %396 = torch.aten.view %395, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %397 = torch.aten.bmm %394, %396 : !torch.vtensor<[8,15,15],f32>, !torch.vtensor<[8,15,64],f32> -> !torch.vtensor<[8,15,64],f32>
    %398 = torch.aten.view %397, %72 : !torch.vtensor<[8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %399 = torch.aten.transpose.int %398, %int1, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,15,8,64],f32>
    %400 = torch.aten.clone %399, %int0 : !torch.vtensor<[1,15,8,64],f32>, !torch.int -> !torch.vtensor<[1,15,8,64],f32>
    %401 = torch.aten.view %400, %126 : !torch.vtensor<[1,15,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %402 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %403 = torch.aten.view %401, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %404 = torch.aten.mm %403, %402 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %405 = torch.aten.view %404, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %406 = torch.aten.add.Tensor %355, %405, %int1 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %407 = torch.aten.pow.Tensor_Scalar %406, %int2 : !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %408 = torch.aten.sum.dim_IntList %407, %43, %true, %none : !torch.vtensor<[1,15,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,15,1],f32>
    %409 = torch.aten.div.Scalar %408, %int512 : !torch.vtensor<[1,15,1],f32>, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %410 = torch.aten.add.Scalar %409, %float9.999990e-07, %int1 : !torch.vtensor<[1,15,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %411 = torch.aten.rsqrt %410 : !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,1],f32>
    %412 = torch.aten.mul.Tensor %406, %411 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,512],f32>
    %413 = torch.aten.mul.Tensor %4, %412 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,15,512],f32> -> !torch.vtensor<[1,15,512],f32>
    %414 = torch.aten.transpose.int %1, %int0, %int1 : !torch.vtensor<[2048,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,2048],f32>
    %415 = torch.aten.view %413, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %416 = torch.aten.mm %415, %414 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,2048],f32> -> !torch.vtensor<[15,2048],f32>
    %417 = torch.aten.view %416, %143 : !torch.vtensor<[15,2048],f32>, !torch.list<int> -> !torch.vtensor<[1,15,2048],f32>
    %418 = torch.aten.relu %417 : !torch.vtensor<[1,15,2048],f32> -> !torch.vtensor<[1,15,2048],f32>
    %419 = torch.aten.transpose.int %0, %int0, %int1 : !torch.vtensor<[512,2048],f32>, !torch.int, !torch.int -> !torch.vtensor<[2048,512],f32>
    %420 = torch.aten.view %418, %147 : !torch.vtensor<[1,15,2048],f32>, !torch.list<int> -> !torch.vtensor<[15,2048],f32>
    %421 = torch.aten.mm %420, %419 : !torch.vtensor<[15,2048],f32>, !torch.vtensor<[2048,512],f32> -> !torch.vtensor<[15,512],f32>
    %422 = torch.aten.view %421, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %423 = torch.aten.add.Tensor %406, %422, %int1 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %424 = torch.aten.pow.Tensor_Scalar %423, %int2 : !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %425 = torch.aten.sum.dim_IntList %424, %43, %true, %none : !torch.vtensor<[1,15,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,15,1],f32>
    %426 = torch.aten.div.Scalar %425, %int512 : !torch.vtensor<[1,15,1],f32>, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %427 = torch.aten.add.Scalar %426, %float9.999990e-07, %int1 : !torch.vtensor<[1,15,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %428 = torch.aten.rsqrt %427 : !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,1],f32>
    %429 = torch.aten.mul.Tensor %423, %428 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,512],f32>
    %430 = torch.aten.mul.Tensor %4, %429 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,15,512],f32> -> !torch.vtensor<[1,15,512],f32>
    %431 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %432 = torch.aten.view %430, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %433 = torch.aten.mm %432, %431 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %434 = torch.aten.view %433, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %435 = torch.aten.view %434, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %436 = torch.aten.transpose.int %435, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %437 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %438 = torch.aten.view %430, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %439 = torch.aten.mm %438, %437 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %440 = torch.aten.view %439, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %441 = torch.aten.view %440, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %442 = torch.aten.transpose.int %441, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %443 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %444 = torch.aten.view %430, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %445 = torch.aten.mm %444, %443 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %446 = torch.aten.view %445, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %447 = torch.aten.view %446, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %448 = torch.aten.transpose.int %447, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %449 = torch.aten.transpose.int %442, %int3, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,15],f32>
    %450 = torch.aten.broadcast_to %436, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %451 = torch.aten.view %450, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %452 = torch.aten.broadcast_to %449, %76 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,15],f32>
    %453 = torch.aten.view %452, %78 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[8,64,15],f32>
    %454 = torch.aten.bmm %451, %453 : !torch.vtensor<[8,15,64],f32>, !torch.vtensor<[8,64,15],f32> -> !torch.vtensor<[8,15,15],f32>
    %455 = torch.aten.view %454, %81 : !torch.vtensor<[8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,15],f32>
    %456 = torch.aten.add.Tensor %455, %111, %int1 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,15],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %values_8, %indices_9 = torch.aten.max.dim %456, %int-1, %true : !torch.vtensor<[1,8,15,15],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,15,1],f32>, !torch.vtensor<[1,8,15,1],si64>
    %457 = torch.aten.sub.Tensor %456, %values_8, %int1 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,1],f32>, !torch.int -> !torch.vtensor<[1,8,15,15],f32>
    %458 = torch.aten.exp %457 : !torch.vtensor<[1,8,15,15],f32> -> !torch.vtensor<[1,8,15,15],f32>
    %459 = torch.aten.sum.dim_IntList %458, %43, %true, %none : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,15,1],f32>
    %460 = torch.aten.div.Tensor %458, %459 : !torch.vtensor<[1,8,15,15],f32>, !torch.vtensor<[1,8,15,1],f32> -> !torch.vtensor<[1,8,15,15],f32>
    %461 = torch.aten.broadcast_to %460, %81 : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,15],f32>
    %462 = torch.aten.view %461, %118 : !torch.vtensor<[1,8,15,15],f32>, !torch.list<int> -> !torch.vtensor<[8,15,15],f32>
    %463 = torch.aten.broadcast_to %448, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %464 = torch.aten.view %463, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %465 = torch.aten.bmm %462, %464 : !torch.vtensor<[8,15,15],f32>, !torch.vtensor<[8,15,64],f32> -> !torch.vtensor<[8,15,64],f32>
    %466 = torch.aten.view %465, %72 : !torch.vtensor<[8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %467 = torch.aten.transpose.int %466, %int1, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,15,8,64],f32>
    %468 = torch.aten.clone %467, %int0 : !torch.vtensor<[1,15,8,64],f32>, !torch.int -> !torch.vtensor<[1,15,8,64],f32>
    %469 = torch.aten.view %468, %126 : !torch.vtensor<[1,15,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %470 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %471 = torch.aten.view %469, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %472 = torch.aten.mm %471, %470 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %473 = torch.aten.view %472, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %474 = torch.aten.add.Tensor %423, %473, %int1 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %475 = torch.aten.pow.Tensor_Scalar %474, %int2 : !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %476 = torch.aten.sum.dim_IntList %475, %43, %true, %none : !torch.vtensor<[1,15,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,15,1],f32>
    %477 = torch.aten.div.Scalar %476, %int512 : !torch.vtensor<[1,15,1],f32>, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %478 = torch.aten.add.Scalar %477, %float9.999990e-07, %int1 : !torch.vtensor<[1,15,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %479 = torch.aten.rsqrt %478 : !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,1],f32>
    %480 = torch.aten.mul.Tensor %474, %479 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,512],f32>
    %481 = torch.aten.mul.Tensor %4, %480 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,15,512],f32> -> !torch.vtensor<[1,15,512],f32>
    %482 = torch.aten.transpose.int %1, %int0, %int1 : !torch.vtensor<[2048,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,2048],f32>
    %483 = torch.aten.view %481, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %484 = torch.aten.mm %483, %482 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,2048],f32> -> !torch.vtensor<[15,2048],f32>
    %485 = torch.aten.view %484, %143 : !torch.vtensor<[15,2048],f32>, !torch.list<int> -> !torch.vtensor<[1,15,2048],f32>
    %486 = torch.aten.relu %485 : !torch.vtensor<[1,15,2048],f32> -> !torch.vtensor<[1,15,2048],f32>
    %487 = torch.aten.transpose.int %0, %int0, %int1 : !torch.vtensor<[512,2048],f32>, !torch.int, !torch.int -> !torch.vtensor<[2048,512],f32>
    %488 = torch.aten.view %486, %147 : !torch.vtensor<[1,15,2048],f32>, !torch.list<int> -> !torch.vtensor<[15,2048],f32>
    %489 = torch.aten.mm %488, %487 : !torch.vtensor<[15,2048],f32>, !torch.vtensor<[2048,512],f32> -> !torch.vtensor<[15,512],f32>
    %490 = torch.aten.view %489, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %491 = torch.aten.add.Tensor %474, %490, %int1 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %492 = torch.aten.pow.Tensor_Scalar %491, %int2 : !torch.vtensor<[1,15,512],f32>, !torch.int -> !torch.vtensor<[1,15,512],f32>
    %493 = torch.aten.sum.dim_IntList %492, %43, %true, %none : !torch.vtensor<[1,15,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,15,1],f32>
    %494 = torch.aten.div.Scalar %493, %int512 : !torch.vtensor<[1,15,1],f32>, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %495 = torch.aten.add.Scalar %494, %float9.999990e-07, %int1 : !torch.vtensor<[1,15,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,15,1],f32>
    %496 = torch.aten.rsqrt %495 : !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,1],f32>
    %497 = torch.aten.mul.Tensor %491, %496 : !torch.vtensor<[1,15,512],f32>, !torch.vtensor<[1,15,1],f32> -> !torch.vtensor<[1,15,512],f32>
    %498 = torch.aten.mul.Tensor %4, %497 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,15,512],f32> -> !torch.vtensor<[1,15,512],f32>
    %499 = torch.prim.ListConstruct %int-1, %int4 : (!torch.int, !torch.int) -> !torch.list<int>
    %500 = torch.aten.view %10, %499 : !torch.tensor<*,si64>, !torch.list<int> -> !torch.tensor<[1,4],si64>
    %501 = torch.tensor_static_info_cast %500 : !torch.tensor<[1,4],si64> to !torch.tensor<*,si64>
    %502 = torch.copy.to_vtensor %501 : !torch.vtensor<*,si64>
    %503 = torch.aten.embedding %5, %502, %int-1, %false, %false : !torch.vtensor<[32128,512],f32>, !torch.vtensor<*,si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,4,512],f32>
    %504 = torch.aten.ones %7, %none, %none, %cpu, %false : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[1,4],f32>
    %505 = torch.aten.ones %36, %int4, %none, %cpu, %false : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[1,15],si64>
    %506 = torch.aten.arange.start_step %int0, %int4, %int1, %none, %none, %cpu, %false : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[4],si64>
    %507 = torch.aten.unsqueeze %506, %int0 : !torch.vtensor<[4],si64>, !torch.int -> !torch.vtensor<[1,4],si64>
    %508 = torch.aten.unsqueeze %507, %int1 : !torch.vtensor<[1,4],si64>, !torch.int -> !torch.vtensor<[1,1,4],si64>
    %509 = torch.prim.ListConstruct %int1, %int1, %int1, %int1, %int1, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %510 = torch.prim.ListConstruct %int1, %int1, %int4, %int1, %int1, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %511 = torch.prim.ListConstruct %int1, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %512 = torch.aten.view %508, %509 : !torch.vtensor<[1,1,4],si64>, !torch.list<int> -> !torch.vtensor<[1,1,1,1,1,4],si64>
    %513 = torch.aten.broadcast_to %512, %510 : !torch.vtensor<[1,1,1,1,1,4],si64>, !torch.list<int> -> !torch.vtensor<[1,1,4,1,1,4],si64>
    %514 = torch.aten.view %513, %511 : !torch.vtensor<[1,1,4,1,1,4],si64>, !torch.list<int> -> !torch.vtensor<[1,4,4],si64>
    %515 = torch.aten.unsqueeze %506, %int0 : !torch.vtensor<[4],si64>, !torch.int -> !torch.vtensor<[1,4],si64>
    %516 = torch.aten.unsqueeze %515, %int2 : !torch.vtensor<[1,4],si64>, !torch.int -> !torch.vtensor<[1,4,1],si64>
    %517 = torch.aten.le.Tensor %514, %516 : !torch.vtensor<[1,4,4],si64>, !torch.vtensor<[1,4,1],si64> -> !torch.vtensor<[1,4,4],i1>
    %518 = torch.aten.to.dtype %517, %int6, %false, %false, %none : !torch.vtensor<[1,4,4],i1>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,4,4],f32>
    %519 = torch.aten.unsqueeze %518, %int1 : !torch.vtensor<[1,4,4],f32>, !torch.int -> !torch.vtensor<[1,1,4,4],f32>
    %520 = torch.aten.unsqueeze %504, %int1 : !torch.vtensor<[1,4],f32>, !torch.int -> !torch.vtensor<[1,1,4],f32>
    %521 = torch.aten.unsqueeze %520, %int2 : !torch.vtensor<[1,1,4],f32>, !torch.int -> !torch.vtensor<[1,1,1,4],f32>
    %522 = torch.aten.mul.Tensor %519, %521 : !torch.vtensor<[1,1,4,4],f32>, !torch.vtensor<[1,1,1,4],f32> -> !torch.vtensor<[1,1,4,4],f32>
    %523 = torch.aten.rsub.Scalar %522, %float1.000000e00, %int1 : !torch.vtensor<[1,1,4,4],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,1,4,4],f32>
    %524 = torch.aten.mul.Scalar %523, %float-3.402820e38 : !torch.vtensor<[1,1,4,4],f32>, !torch.float -> !torch.vtensor<[1,1,4,4],f32>
    %525 = torch.aten.unsqueeze %505, %int1 : !torch.vtensor<[1,15],si64>, !torch.int -> !torch.vtensor<[1,1,15],si64>
    %526 = torch.aten.unsqueeze %525, %int2 : !torch.vtensor<[1,1,15],si64>, !torch.int -> !torch.vtensor<[1,1,1,15],si64>
    %527 = torch.aten.to.dtype %526, %int6, %false, %false, %none : !torch.vtensor<[1,1,1,15],si64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,1,1,15],f32>
    %528 = torch.aten.rsub.Scalar %527, %float1.000000e00, %int1 : !torch.vtensor<[1,1,1,15],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,1,1,15],f32>
    %529 = torch.aten.mul.Scalar %528, %float-3.402820e38 : !torch.vtensor<[1,1,1,15],f32>, !torch.float -> !torch.vtensor<[1,1,1,15],f32>
    %530 = torch.aten.pow.Tensor_Scalar %503, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %531 = torch.aten.sum.dim_IntList %530, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %532 = torch.aten.div.Scalar %531, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %533 = torch.aten.add.Scalar %532, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %534 = torch.aten.rsqrt %533 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %535 = torch.aten.mul.Tensor %503, %534 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %536 = torch.aten.mul.Tensor %4, %535 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %537 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %538 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %539 = torch.aten.view %536, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %540 = torch.aten.mm %539, %537 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %541 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %542 = torch.aten.view %540, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %543 = torch.aten.view %542, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %544 = torch.aten.transpose.int %543, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %545 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %546 = torch.aten.view %536, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %547 = torch.aten.mm %546, %545 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %548 = torch.aten.view %547, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %549 = torch.aten.view %548, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %550 = torch.aten.transpose.int %549, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %551 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %552 = torch.aten.view %536, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %553 = torch.aten.mm %552, %551 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %554 = torch.aten.view %553, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %555 = torch.aten.view %554, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %556 = torch.aten.transpose.int %555, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %557 = torch.aten.transpose.int %550, %int3, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,4],f32>
    %558 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %559 = torch.aten.broadcast_to %544, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %560 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %561 = torch.aten.view %559, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %562 = torch.prim.ListConstruct %int1, %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %563 = torch.aten.broadcast_to %557, %562 : !torch.vtensor<[1,8,64,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,4],f32>
    %564 = torch.prim.ListConstruct %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %565 = torch.aten.view %563, %564 : !torch.vtensor<[1,8,64,4],f32>, !torch.list<int> -> !torch.vtensor<[8,64,4],f32>
    %566 = torch.aten.bmm %561, %565 : !torch.vtensor<[8,4,64],f32>, !torch.vtensor<[8,64,4],f32> -> !torch.vtensor<[8,4,4],f32>
    %567 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %568 = torch.aten.view %566, %567 : !torch.vtensor<[8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,4],f32>
    %569 = torch.aten.arange.start_step %int0, %int4, %int1, %int4, %none, %cpu, %false : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[4],si64>
    %570 = torch.aten.unsqueeze %569, %int1 : !torch.vtensor<[4],si64>, !torch.int -> !torch.vtensor<[4,1],si64>
    %571 = torch.aten.arange.start_step %int0, %int4, %int1, %int4, %none, %cpu, %false : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[4],si64>
    %572 = torch.aten.unsqueeze %571, %int0 : !torch.vtensor<[4],si64>, !torch.int -> !torch.vtensor<[1,4],si64>
    %573 = torch.aten.sub.Tensor %572, %570, %int1 : !torch.vtensor<[1,4],si64>, !torch.vtensor<[4,1],si64>, !torch.int -> !torch.vtensor<[4,4],si64>
    %574 = torch.prim.NumToTensor.Scalar %int0 : !torch.int -> !torch.vtensor<[],si64>
    %575 = torch.prim.ListConstruct %int4, %int4 : (!torch.int, !torch.int) -> !torch.list<int>
    %576 = torch.aten.broadcast_to %574, %575 : !torch.vtensor<[],si64>, !torch.list<int> -> !torch.vtensor<[4,4],si64>
    %577 = torch.aten.minimum %573, %576 : !torch.vtensor<[4,4],si64>, !torch.vtensor<[4,4],si64> -> !torch.vtensor<[4,4],si64>
    %578 = torch.aten.neg %577 : !torch.vtensor<[4,4],si64> -> !torch.vtensor<[4,4],si64>
    %579 = torch.aten.lt.Scalar %578, %int16 : !torch.vtensor<[4,4],si64>, !torch.int -> !torch.vtensor<[4,4],i1>
    %580 = torch.aten.to.dtype %578, %int6, %false, %false, %none : !torch.vtensor<[4,4],si64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,4],f32>
    %581 = torch.aten.div.Scalar %580, %int16 : !torch.vtensor<[4,4],f32>, !torch.int -> !torch.vtensor<[4,4],f32>
    %582 = torch.aten.log %581 : !torch.vtensor<[4,4],f32> -> !torch.vtensor<[4,4],f32>
    %583 = torch.aten.div.Scalar %582, %float2.079440e00 : !torch.vtensor<[4,4],f32>, !torch.float -> !torch.vtensor<[4,4],f32>
    %584 = torch.aten.mul.Scalar %583, %int16 : !torch.vtensor<[4,4],f32>, !torch.int -> !torch.vtensor<[4,4],f32>
    %585 = torch.aten.to.dtype %584, %int4, %false, %false, %none : !torch.vtensor<[4,4],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,4],si64>
    %586 = torch.aten.add.Scalar %585, %int16, %int1 : !torch.vtensor<[4,4],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,4],si64>
    %587 = torch.prim.NumToTensor.Scalar %int31 : !torch.int -> !torch.vtensor<[],si64>
    %588 = torch.aten.broadcast_to %587, %575 : !torch.vtensor<[],si64>, !torch.list<int> -> !torch.vtensor<[4,4],si64>
    %589 = torch.aten.minimum %586, %588 : !torch.vtensor<[4,4],si64>, !torch.vtensor<[4,4],si64> -> !torch.vtensor<[4,4],si64>
    %590 = torch.aten.where.self %579, %578, %589 : !torch.vtensor<[4,4],i1>, !torch.vtensor<[4,4],si64>, !torch.vtensor<[4,4],si64> -> !torch.vtensor<[4,4],si64>
    %591 = torch.aten.add.Scalar %590, %int0, %int1 : !torch.vtensor<[4,4],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,4],si64>
    %592 = torch.aten.embedding %2, %591, %int-1, %false, %false : !torch.vtensor<[32,8],f32>, !torch.vtensor<[4,4],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[4,4,8],f32>
    %593 = torch.aten.permute %592, %108 : !torch.vtensor<[4,4,8],f32>, !torch.list<int> -> !torch.vtensor<[8,4,4],f32>
    %594 = torch.aten.unsqueeze %593, %int0 : !torch.vtensor<[8,4,4],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %595 = torch.aten.add.Tensor %594, %524, %int1 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,1,4,4],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %596 = torch.aten.add.Tensor %568, %595, %int1 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,4],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %values_10, %indices_11 = torch.aten.max.dim %596, %int-1, %true : !torch.vtensor<[1,8,4,4],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,4,1],f32>, !torch.vtensor<[1,8,4,1],si64>
    %597 = torch.aten.sub.Tensor %596, %values_10, %int1 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,1],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %598 = torch.aten.exp %597 : !torch.vtensor<[1,8,4,4],f32> -> !torch.vtensor<[1,8,4,4],f32>
    %599 = torch.aten.sum.dim_IntList %598, %43, %true, %none : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,4,1],f32>
    %600 = torch.aten.div.Tensor %598, %599 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,1],f32> -> !torch.vtensor<[1,8,4,4],f32>
    %601 = torch.aten.broadcast_to %600, %567 : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,4],f32>
    %602 = torch.prim.ListConstruct %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %603 = torch.aten.view %601, %602 : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[8,4,4],f32>
    %604 = torch.aten.broadcast_to %556, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %605 = torch.aten.view %604, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %606 = torch.aten.bmm %603, %605 : !torch.vtensor<[8,4,4],f32>, !torch.vtensor<[8,4,64],f32> -> !torch.vtensor<[8,4,64],f32>
    %607 = torch.aten.view %606, %558 : !torch.vtensor<[8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %608 = torch.aten.transpose.int %607, %int1, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %609 = torch.aten.clone %608, %int0 : !torch.vtensor<[1,4,8,64],f32>, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %610 = torch.aten.view %609, %126 : !torch.vtensor<[1,4,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %611 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %612 = torch.aten.view %610, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %613 = torch.aten.mm %612, %611 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %614 = torch.aten.view %613, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %615 = torch.aten.add.Tensor %503, %614, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %616 = torch.aten.pow.Tensor_Scalar %615, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %617 = torch.aten.sum.dim_IntList %616, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %618 = torch.aten.div.Scalar %617, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %619 = torch.aten.add.Scalar %618, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %620 = torch.aten.rsqrt %619 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %621 = torch.aten.mul.Tensor %615, %620 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %622 = torch.aten.mul.Tensor %4, %621 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %623 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %624 = torch.aten.view %622, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %625 = torch.aten.mm %624, %623 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %626 = torch.aten.view %625, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %627 = torch.aten.view %626, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %628 = torch.aten.transpose.int %627, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %629 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %630 = torch.aten.view %498, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %631 = torch.aten.mm %630, %629 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %632 = torch.aten.view %631, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %633 = torch.aten.view %632, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %634 = torch.aten.transpose.int %633, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %635 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %636 = torch.aten.view %498, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %637 = torch.aten.mm %636, %635 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %638 = torch.aten.view %637, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %639 = torch.aten.view %638, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %640 = torch.aten.transpose.int %639, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %641 = torch.aten.transpose.int %634, %int3, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,15],f32>
    %642 = torch.aten.broadcast_to %628, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %643 = torch.aten.view %642, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %644 = torch.aten.broadcast_to %641, %76 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,15],f32>
    %645 = torch.aten.view %644, %78 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[8,64,15],f32>
    %646 = torch.aten.bmm %643, %645 : !torch.vtensor<[8,4,64],f32>, !torch.vtensor<[8,64,15],f32> -> !torch.vtensor<[8,4,15],f32>
    %647 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %648 = torch.aten.view %646, %647 : !torch.vtensor<[8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,15],f32>
    %649 = torch.aten.zeros %647, %int6, %none, %cpu, %false : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[1,8,4,15],f32>
    %650 = torch.aten.add.Tensor %649, %529, %int1 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,1,1,15],f32>, !torch.int -> !torch.vtensor<[1,8,4,15],f32>
    %651 = torch.aten.add.Tensor %648, %650, %int1 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,15],f32>, !torch.int -> !torch.vtensor<[1,8,4,15],f32>
    %values_12, %indices_13 = torch.aten.max.dim %651, %int-1, %true : !torch.vtensor<[1,8,4,15],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,4,1],f32>, !torch.vtensor<[1,8,4,1],si64>
    %652 = torch.aten.sub.Tensor %651, %values_12, %int1 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,1],f32>, !torch.int -> !torch.vtensor<[1,8,4,15],f32>
    %653 = torch.aten.exp %652 : !torch.vtensor<[1,8,4,15],f32> -> !torch.vtensor<[1,8,4,15],f32>
    %654 = torch.aten.sum.dim_IntList %653, %43, %true, %none : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,4,1],f32>
    %655 = torch.aten.div.Tensor %653, %654 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,1],f32> -> !torch.vtensor<[1,8,4,15],f32>
    %656 = torch.aten.broadcast_to %655, %647 : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,15],f32>
    %657 = torch.prim.ListConstruct %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %658 = torch.aten.view %656, %657 : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[8,4,15],f32>
    %659 = torch.aten.broadcast_to %640, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %660 = torch.aten.view %659, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %661 = torch.aten.bmm %658, %660 : !torch.vtensor<[8,4,15],f32>, !torch.vtensor<[8,15,64],f32> -> !torch.vtensor<[8,4,64],f32>
    %662 = torch.aten.view %661, %558 : !torch.vtensor<[8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %663 = torch.aten.transpose.int %662, %int1, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %664 = torch.aten.clone %663, %int0 : !torch.vtensor<[1,4,8,64],f32>, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %665 = torch.aten.view %664, %126 : !torch.vtensor<[1,4,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %666 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %667 = torch.aten.view %665, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %668 = torch.aten.mm %667, %666 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %669 = torch.aten.view %668, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %670 = torch.aten.add.Tensor %615, %669, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %671 = torch.aten.pow.Tensor_Scalar %670, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %672 = torch.aten.sum.dim_IntList %671, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %673 = torch.aten.div.Scalar %672, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %674 = torch.aten.add.Scalar %673, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %675 = torch.aten.rsqrt %674 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %676 = torch.aten.mul.Tensor %670, %675 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %677 = torch.aten.mul.Tensor %4, %676 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %678 = torch.aten.transpose.int %1, %int0, %int1 : !torch.vtensor<[2048,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,2048],f32>
    %679 = torch.aten.view %677, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %680 = torch.aten.mm %679, %678 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,2048],f32> -> !torch.vtensor<[4,2048],f32>
    %681 = torch.prim.ListConstruct %int1, %int4, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %682 = torch.aten.view %680, %681 : !torch.vtensor<[4,2048],f32>, !torch.list<int> -> !torch.vtensor<[1,4,2048],f32>
    %683 = torch.aten.relu %682 : !torch.vtensor<[1,4,2048],f32> -> !torch.vtensor<[1,4,2048],f32>
    %684 = torch.aten.transpose.int %0, %int0, %int1 : !torch.vtensor<[512,2048],f32>, !torch.int, !torch.int -> !torch.vtensor<[2048,512],f32>
    %685 = torch.prim.ListConstruct %int4, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %686 = torch.aten.view %683, %685 : !torch.vtensor<[1,4,2048],f32>, !torch.list<int> -> !torch.vtensor<[4,2048],f32>
    %687 = torch.aten.mm %686, %684 : !torch.vtensor<[4,2048],f32>, !torch.vtensor<[2048,512],f32> -> !torch.vtensor<[4,512],f32>
    %688 = torch.aten.view %687, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %689 = torch.aten.add.Tensor %670, %688, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %690 = torch.aten.pow.Tensor_Scalar %689, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %691 = torch.aten.sum.dim_IntList %690, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %692 = torch.aten.div.Scalar %691, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %693 = torch.aten.add.Scalar %692, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %694 = torch.aten.rsqrt %693 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %695 = torch.aten.mul.Tensor %689, %694 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %696 = torch.aten.mul.Tensor %4, %695 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %697 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %698 = torch.aten.view %696, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %699 = torch.aten.mm %698, %697 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %700 = torch.aten.view %699, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %701 = torch.aten.view %700, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %702 = torch.aten.transpose.int %701, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %703 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %704 = torch.aten.view %696, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %705 = torch.aten.mm %704, %703 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %706 = torch.aten.view %705, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %707 = torch.aten.view %706, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %708 = torch.aten.transpose.int %707, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %709 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %710 = torch.aten.view %696, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %711 = torch.aten.mm %710, %709 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %712 = torch.aten.view %711, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %713 = torch.aten.view %712, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %714 = torch.aten.transpose.int %713, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %715 = torch.aten.transpose.int %708, %int3, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,4],f32>
    %716 = torch.aten.broadcast_to %702, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %717 = torch.aten.view %716, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %718 = torch.aten.broadcast_to %715, %562 : !torch.vtensor<[1,8,64,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,4],f32>
    %719 = torch.aten.view %718, %564 : !torch.vtensor<[1,8,64,4],f32>, !torch.list<int> -> !torch.vtensor<[8,64,4],f32>
    %720 = torch.aten.bmm %717, %719 : !torch.vtensor<[8,4,64],f32>, !torch.vtensor<[8,64,4],f32> -> !torch.vtensor<[8,4,4],f32>
    %721 = torch.aten.view %720, %567 : !torch.vtensor<[8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,4],f32>
    %722 = torch.aten.add.Tensor %721, %595, %int1 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,4],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %values_14, %indices_15 = torch.aten.max.dim %722, %int-1, %true : !torch.vtensor<[1,8,4,4],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,4,1],f32>, !torch.vtensor<[1,8,4,1],si64>
    %723 = torch.aten.sub.Tensor %722, %values_14, %int1 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,1],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %724 = torch.aten.exp %723 : !torch.vtensor<[1,8,4,4],f32> -> !torch.vtensor<[1,8,4,4],f32>
    %725 = torch.aten.sum.dim_IntList %724, %43, %true, %none : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,4,1],f32>
    %726 = torch.aten.div.Tensor %724, %725 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,1],f32> -> !torch.vtensor<[1,8,4,4],f32>
    %727 = torch.aten.broadcast_to %726, %567 : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,4],f32>
    %728 = torch.aten.view %727, %602 : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[8,4,4],f32>
    %729 = torch.aten.broadcast_to %714, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %730 = torch.aten.view %729, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %731 = torch.aten.bmm %728, %730 : !torch.vtensor<[8,4,4],f32>, !torch.vtensor<[8,4,64],f32> -> !torch.vtensor<[8,4,64],f32>
    %732 = torch.aten.view %731, %558 : !torch.vtensor<[8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %733 = torch.aten.transpose.int %732, %int1, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %734 = torch.aten.clone %733, %int0 : !torch.vtensor<[1,4,8,64],f32>, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %735 = torch.aten.view %734, %126 : !torch.vtensor<[1,4,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %736 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %737 = torch.aten.view %735, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %738 = torch.aten.mm %737, %736 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %739 = torch.aten.view %738, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %740 = torch.aten.add.Tensor %689, %739, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %741 = torch.aten.pow.Tensor_Scalar %740, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %742 = torch.aten.sum.dim_IntList %741, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %743 = torch.aten.div.Scalar %742, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %744 = torch.aten.add.Scalar %743, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %745 = torch.aten.rsqrt %744 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %746 = torch.aten.mul.Tensor %740, %745 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %747 = torch.aten.mul.Tensor %4, %746 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %748 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %749 = torch.aten.view %747, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %750 = torch.aten.mm %749, %748 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %751 = torch.aten.view %750, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %752 = torch.aten.view %751, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %753 = torch.aten.transpose.int %752, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %754 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %755 = torch.aten.view %498, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %756 = torch.aten.mm %755, %754 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %757 = torch.aten.view %756, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %758 = torch.aten.view %757, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %759 = torch.aten.transpose.int %758, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %760 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %761 = torch.aten.view %498, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %762 = torch.aten.mm %761, %760 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %763 = torch.aten.view %762, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %764 = torch.aten.view %763, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %765 = torch.aten.transpose.int %764, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %766 = torch.aten.transpose.int %759, %int3, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,15],f32>
    %767 = torch.aten.broadcast_to %753, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %768 = torch.aten.view %767, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %769 = torch.aten.broadcast_to %766, %76 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,15],f32>
    %770 = torch.aten.view %769, %78 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[8,64,15],f32>
    %771 = torch.aten.bmm %768, %770 : !torch.vtensor<[8,4,64],f32>, !torch.vtensor<[8,64,15],f32> -> !torch.vtensor<[8,4,15],f32>
    %772 = torch.aten.view %771, %647 : !torch.vtensor<[8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,15],f32>
    %773 = torch.aten.add.Tensor %772, %650, %int1 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,15],f32>, !torch.int -> !torch.vtensor<[1,8,4,15],f32>
    %values_16, %indices_17 = torch.aten.max.dim %773, %int-1, %true : !torch.vtensor<[1,8,4,15],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,4,1],f32>, !torch.vtensor<[1,8,4,1],si64>
    %774 = torch.aten.sub.Tensor %773, %values_16, %int1 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,1],f32>, !torch.int -> !torch.vtensor<[1,8,4,15],f32>
    %775 = torch.aten.exp %774 : !torch.vtensor<[1,8,4,15],f32> -> !torch.vtensor<[1,8,4,15],f32>
    %776 = torch.aten.sum.dim_IntList %775, %43, %true, %none : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,4,1],f32>
    %777 = torch.aten.div.Tensor %775, %776 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,1],f32> -> !torch.vtensor<[1,8,4,15],f32>
    %778 = torch.aten.broadcast_to %777, %647 : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,15],f32>
    %779 = torch.aten.view %778, %657 : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[8,4,15],f32>
    %780 = torch.aten.broadcast_to %765, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %781 = torch.aten.view %780, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %782 = torch.aten.bmm %779, %781 : !torch.vtensor<[8,4,15],f32>, !torch.vtensor<[8,15,64],f32> -> !torch.vtensor<[8,4,64],f32>
    %783 = torch.aten.view %782, %558 : !torch.vtensor<[8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %784 = torch.aten.transpose.int %783, %int1, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %785 = torch.aten.clone %784, %int0 : !torch.vtensor<[1,4,8,64],f32>, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %786 = torch.aten.view %785, %126 : !torch.vtensor<[1,4,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %787 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %788 = torch.aten.view %786, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %789 = torch.aten.mm %788, %787 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %790 = torch.aten.view %789, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %791 = torch.aten.add.Tensor %740, %790, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %792 = torch.aten.pow.Tensor_Scalar %791, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %793 = torch.aten.sum.dim_IntList %792, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %794 = torch.aten.div.Scalar %793, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %795 = torch.aten.add.Scalar %794, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %796 = torch.aten.rsqrt %795 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %797 = torch.aten.mul.Tensor %791, %796 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %798 = torch.aten.mul.Tensor %4, %797 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %799 = torch.aten.transpose.int %1, %int0, %int1 : !torch.vtensor<[2048,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,2048],f32>
    %800 = torch.aten.view %798, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %801 = torch.aten.mm %800, %799 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,2048],f32> -> !torch.vtensor<[4,2048],f32>
    %802 = torch.aten.view %801, %681 : !torch.vtensor<[4,2048],f32>, !torch.list<int> -> !torch.vtensor<[1,4,2048],f32>
    %803 = torch.aten.relu %802 : !torch.vtensor<[1,4,2048],f32> -> !torch.vtensor<[1,4,2048],f32>
    %804 = torch.aten.transpose.int %0, %int0, %int1 : !torch.vtensor<[512,2048],f32>, !torch.int, !torch.int -> !torch.vtensor<[2048,512],f32>
    %805 = torch.aten.view %803, %685 : !torch.vtensor<[1,4,2048],f32>, !torch.list<int> -> !torch.vtensor<[4,2048],f32>
    %806 = torch.aten.mm %805, %804 : !torch.vtensor<[4,2048],f32>, !torch.vtensor<[2048,512],f32> -> !torch.vtensor<[4,512],f32>
    %807 = torch.aten.view %806, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %808 = torch.aten.add.Tensor %791, %807, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %809 = torch.aten.pow.Tensor_Scalar %808, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %810 = torch.aten.sum.dim_IntList %809, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %811 = torch.aten.div.Scalar %810, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %812 = torch.aten.add.Scalar %811, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %813 = torch.aten.rsqrt %812 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %814 = torch.aten.mul.Tensor %808, %813 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %815 = torch.aten.mul.Tensor %4, %814 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %816 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %817 = torch.aten.view %815, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %818 = torch.aten.mm %817, %816 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %819 = torch.aten.view %818, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %820 = torch.aten.view %819, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %821 = torch.aten.transpose.int %820, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %822 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %823 = torch.aten.view %815, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %824 = torch.aten.mm %823, %822 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %825 = torch.aten.view %824, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %826 = torch.aten.view %825, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %827 = torch.aten.transpose.int %826, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %828 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %829 = torch.aten.view %815, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %830 = torch.aten.mm %829, %828 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %831 = torch.aten.view %830, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %832 = torch.aten.view %831, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %833 = torch.aten.transpose.int %832, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %834 = torch.aten.transpose.int %827, %int3, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,4],f32>
    %835 = torch.aten.broadcast_to %821, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %836 = torch.aten.view %835, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %837 = torch.aten.broadcast_to %834, %562 : !torch.vtensor<[1,8,64,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,4],f32>
    %838 = torch.aten.view %837, %564 : !torch.vtensor<[1,8,64,4],f32>, !torch.list<int> -> !torch.vtensor<[8,64,4],f32>
    %839 = torch.aten.bmm %836, %838 : !torch.vtensor<[8,4,64],f32>, !torch.vtensor<[8,64,4],f32> -> !torch.vtensor<[8,4,4],f32>
    %840 = torch.aten.view %839, %567 : !torch.vtensor<[8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,4],f32>
    %841 = torch.aten.add.Tensor %840, %595, %int1 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,4],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %values_18, %indices_19 = torch.aten.max.dim %841, %int-1, %true : !torch.vtensor<[1,8,4,4],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,4,1],f32>, !torch.vtensor<[1,8,4,1],si64>
    %842 = torch.aten.sub.Tensor %841, %values_18, %int1 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,1],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %843 = torch.aten.exp %842 : !torch.vtensor<[1,8,4,4],f32> -> !torch.vtensor<[1,8,4,4],f32>
    %844 = torch.aten.sum.dim_IntList %843, %43, %true, %none : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,4,1],f32>
    %845 = torch.aten.div.Tensor %843, %844 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,1],f32> -> !torch.vtensor<[1,8,4,4],f32>
    %846 = torch.aten.broadcast_to %845, %567 : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,4],f32>
    %847 = torch.aten.view %846, %602 : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[8,4,4],f32>
    %848 = torch.aten.broadcast_to %833, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %849 = torch.aten.view %848, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %850 = torch.aten.bmm %847, %849 : !torch.vtensor<[8,4,4],f32>, !torch.vtensor<[8,4,64],f32> -> !torch.vtensor<[8,4,64],f32>
    %851 = torch.aten.view %850, %558 : !torch.vtensor<[8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %852 = torch.aten.transpose.int %851, %int1, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %853 = torch.aten.clone %852, %int0 : !torch.vtensor<[1,4,8,64],f32>, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %854 = torch.aten.view %853, %126 : !torch.vtensor<[1,4,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %855 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %856 = torch.aten.view %854, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %857 = torch.aten.mm %856, %855 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %858 = torch.aten.view %857, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %859 = torch.aten.add.Tensor %808, %858, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %860 = torch.aten.pow.Tensor_Scalar %859, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %861 = torch.aten.sum.dim_IntList %860, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %862 = torch.aten.div.Scalar %861, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %863 = torch.aten.add.Scalar %862, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %864 = torch.aten.rsqrt %863 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %865 = torch.aten.mul.Tensor %859, %864 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %866 = torch.aten.mul.Tensor %4, %865 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %867 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %868 = torch.aten.view %866, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %869 = torch.aten.mm %868, %867 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %870 = torch.aten.view %869, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %871 = torch.aten.view %870, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %872 = torch.aten.transpose.int %871, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %873 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %874 = torch.aten.view %498, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %875 = torch.aten.mm %874, %873 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %876 = torch.aten.view %875, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %877 = torch.aten.view %876, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %878 = torch.aten.transpose.int %877, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %879 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %880 = torch.aten.view %498, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %881 = torch.aten.mm %880, %879 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %882 = torch.aten.view %881, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %883 = torch.aten.view %882, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %884 = torch.aten.transpose.int %883, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %885 = torch.aten.transpose.int %878, %int3, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,15],f32>
    %886 = torch.aten.broadcast_to %872, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %887 = torch.aten.view %886, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %888 = torch.aten.broadcast_to %885, %76 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,15],f32>
    %889 = torch.aten.view %888, %78 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[8,64,15],f32>
    %890 = torch.aten.bmm %887, %889 : !torch.vtensor<[8,4,64],f32>, !torch.vtensor<[8,64,15],f32> -> !torch.vtensor<[8,4,15],f32>
    %891 = torch.aten.view %890, %647 : !torch.vtensor<[8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,15],f32>
    %892 = torch.aten.add.Tensor %891, %650, %int1 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,15],f32>, !torch.int -> !torch.vtensor<[1,8,4,15],f32>
    %values_20, %indices_21 = torch.aten.max.dim %892, %int-1, %true : !torch.vtensor<[1,8,4,15],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,4,1],f32>, !torch.vtensor<[1,8,4,1],si64>
    %893 = torch.aten.sub.Tensor %892, %values_20, %int1 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,1],f32>, !torch.int -> !torch.vtensor<[1,8,4,15],f32>
    %894 = torch.aten.exp %893 : !torch.vtensor<[1,8,4,15],f32> -> !torch.vtensor<[1,8,4,15],f32>
    %895 = torch.aten.sum.dim_IntList %894, %43, %true, %none : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,4,1],f32>
    %896 = torch.aten.div.Tensor %894, %895 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,1],f32> -> !torch.vtensor<[1,8,4,15],f32>
    %897 = torch.aten.broadcast_to %896, %647 : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,15],f32>
    %898 = torch.aten.view %897, %657 : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[8,4,15],f32>
    %899 = torch.aten.broadcast_to %884, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %900 = torch.aten.view %899, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %901 = torch.aten.bmm %898, %900 : !torch.vtensor<[8,4,15],f32>, !torch.vtensor<[8,15,64],f32> -> !torch.vtensor<[8,4,64],f32>
    %902 = torch.aten.view %901, %558 : !torch.vtensor<[8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %903 = torch.aten.transpose.int %902, %int1, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %904 = torch.aten.clone %903, %int0 : !torch.vtensor<[1,4,8,64],f32>, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %905 = torch.aten.view %904, %126 : !torch.vtensor<[1,4,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %906 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %907 = torch.aten.view %905, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %908 = torch.aten.mm %907, %906 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %909 = torch.aten.view %908, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %910 = torch.aten.add.Tensor %859, %909, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %911 = torch.aten.pow.Tensor_Scalar %910, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %912 = torch.aten.sum.dim_IntList %911, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %913 = torch.aten.div.Scalar %912, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %914 = torch.aten.add.Scalar %913, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %915 = torch.aten.rsqrt %914 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %916 = torch.aten.mul.Tensor %910, %915 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %917 = torch.aten.mul.Tensor %4, %916 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %918 = torch.aten.transpose.int %1, %int0, %int1 : !torch.vtensor<[2048,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,2048],f32>
    %919 = torch.aten.view %917, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %920 = torch.aten.mm %919, %918 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,2048],f32> -> !torch.vtensor<[4,2048],f32>
    %921 = torch.aten.view %920, %681 : !torch.vtensor<[4,2048],f32>, !torch.list<int> -> !torch.vtensor<[1,4,2048],f32>
    %922 = torch.aten.relu %921 : !torch.vtensor<[1,4,2048],f32> -> !torch.vtensor<[1,4,2048],f32>
    %923 = torch.aten.transpose.int %0, %int0, %int1 : !torch.vtensor<[512,2048],f32>, !torch.int, !torch.int -> !torch.vtensor<[2048,512],f32>
    %924 = torch.aten.view %922, %685 : !torch.vtensor<[1,4,2048],f32>, !torch.list<int> -> !torch.vtensor<[4,2048],f32>
    %925 = torch.aten.mm %924, %923 : !torch.vtensor<[4,2048],f32>, !torch.vtensor<[2048,512],f32> -> !torch.vtensor<[4,512],f32>
    %926 = torch.aten.view %925, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %927 = torch.aten.add.Tensor %910, %926, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %928 = torch.aten.pow.Tensor_Scalar %927, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %929 = torch.aten.sum.dim_IntList %928, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %930 = torch.aten.div.Scalar %929, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %931 = torch.aten.add.Scalar %930, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %932 = torch.aten.rsqrt %931 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %933 = torch.aten.mul.Tensor %927, %932 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %934 = torch.aten.mul.Tensor %4, %933 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %935 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %936 = torch.aten.view %934, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %937 = torch.aten.mm %936, %935 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %938 = torch.aten.view %937, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %939 = torch.aten.view %938, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %940 = torch.aten.transpose.int %939, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %941 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %942 = torch.aten.view %934, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %943 = torch.aten.mm %942, %941 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %944 = torch.aten.view %943, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %945 = torch.aten.view %944, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %946 = torch.aten.transpose.int %945, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %947 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %948 = torch.aten.view %934, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %949 = torch.aten.mm %948, %947 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %950 = torch.aten.view %949, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %951 = torch.aten.view %950, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %952 = torch.aten.transpose.int %951, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %953 = torch.aten.transpose.int %946, %int3, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,4],f32>
    %954 = torch.aten.broadcast_to %940, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %955 = torch.aten.view %954, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %956 = torch.aten.broadcast_to %953, %562 : !torch.vtensor<[1,8,64,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,4],f32>
    %957 = torch.aten.view %956, %564 : !torch.vtensor<[1,8,64,4],f32>, !torch.list<int> -> !torch.vtensor<[8,64,4],f32>
    %958 = torch.aten.bmm %955, %957 : !torch.vtensor<[8,4,64],f32>, !torch.vtensor<[8,64,4],f32> -> !torch.vtensor<[8,4,4],f32>
    %959 = torch.aten.view %958, %567 : !torch.vtensor<[8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,4],f32>
    %960 = torch.aten.add.Tensor %959, %595, %int1 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,4],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %values_22, %indices_23 = torch.aten.max.dim %960, %int-1, %true : !torch.vtensor<[1,8,4,4],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,4,1],f32>, !torch.vtensor<[1,8,4,1],si64>
    %961 = torch.aten.sub.Tensor %960, %values_22, %int1 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,1],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %962 = torch.aten.exp %961 : !torch.vtensor<[1,8,4,4],f32> -> !torch.vtensor<[1,8,4,4],f32>
    %963 = torch.aten.sum.dim_IntList %962, %43, %true, %none : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,4,1],f32>
    %964 = torch.aten.div.Tensor %962, %963 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,1],f32> -> !torch.vtensor<[1,8,4,4],f32>
    %965 = torch.aten.broadcast_to %964, %567 : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,4],f32>
    %966 = torch.aten.view %965, %602 : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[8,4,4],f32>
    %967 = torch.aten.broadcast_to %952, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %968 = torch.aten.view %967, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %969 = torch.aten.bmm %966, %968 : !torch.vtensor<[8,4,4],f32>, !torch.vtensor<[8,4,64],f32> -> !torch.vtensor<[8,4,64],f32>
    %970 = torch.aten.view %969, %558 : !torch.vtensor<[8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %971 = torch.aten.transpose.int %970, %int1, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %972 = torch.aten.clone %971, %int0 : !torch.vtensor<[1,4,8,64],f32>, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %973 = torch.aten.view %972, %126 : !torch.vtensor<[1,4,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %974 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %975 = torch.aten.view %973, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %976 = torch.aten.mm %975, %974 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %977 = torch.aten.view %976, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %978 = torch.aten.add.Tensor %927, %977, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %979 = torch.aten.pow.Tensor_Scalar %978, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %980 = torch.aten.sum.dim_IntList %979, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %981 = torch.aten.div.Scalar %980, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %982 = torch.aten.add.Scalar %981, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %983 = torch.aten.rsqrt %982 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %984 = torch.aten.mul.Tensor %978, %983 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %985 = torch.aten.mul.Tensor %4, %984 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %986 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %987 = torch.aten.view %985, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %988 = torch.aten.mm %987, %986 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %989 = torch.aten.view %988, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %990 = torch.aten.view %989, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %991 = torch.aten.transpose.int %990, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %992 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %993 = torch.aten.view %498, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %994 = torch.aten.mm %993, %992 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %995 = torch.aten.view %994, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %996 = torch.aten.view %995, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %997 = torch.aten.transpose.int %996, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %998 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %999 = torch.aten.view %498, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %1000 = torch.aten.mm %999, %998 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %1001 = torch.aten.view %1000, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %1002 = torch.aten.view %1001, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %1003 = torch.aten.transpose.int %1002, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %1004 = torch.aten.transpose.int %997, %int3, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,15],f32>
    %1005 = torch.aten.broadcast_to %991, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %1006 = torch.aten.view %1005, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %1007 = torch.aten.broadcast_to %1004, %76 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,15],f32>
    %1008 = torch.aten.view %1007, %78 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[8,64,15],f32>
    %1009 = torch.aten.bmm %1006, %1008 : !torch.vtensor<[8,4,64],f32>, !torch.vtensor<[8,64,15],f32> -> !torch.vtensor<[8,4,15],f32>
    %1010 = torch.aten.view %1009, %647 : !torch.vtensor<[8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,15],f32>
    %1011 = torch.aten.add.Tensor %1010, %650, %int1 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,15],f32>, !torch.int -> !torch.vtensor<[1,8,4,15],f32>
    %values_24, %indices_25 = torch.aten.max.dim %1011, %int-1, %true : !torch.vtensor<[1,8,4,15],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,4,1],f32>, !torch.vtensor<[1,8,4,1],si64>
    %1012 = torch.aten.sub.Tensor %1011, %values_24, %int1 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,1],f32>, !torch.int -> !torch.vtensor<[1,8,4,15],f32>
    %1013 = torch.aten.exp %1012 : !torch.vtensor<[1,8,4,15],f32> -> !torch.vtensor<[1,8,4,15],f32>
    %1014 = torch.aten.sum.dim_IntList %1013, %43, %true, %none : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,4,1],f32>
    %1015 = torch.aten.div.Tensor %1013, %1014 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,1],f32> -> !torch.vtensor<[1,8,4,15],f32>
    %1016 = torch.aten.broadcast_to %1015, %647 : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,15],f32>
    %1017 = torch.aten.view %1016, %657 : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[8,4,15],f32>
    %1018 = torch.aten.broadcast_to %1003, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %1019 = torch.aten.view %1018, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %1020 = torch.aten.bmm %1017, %1019 : !torch.vtensor<[8,4,15],f32>, !torch.vtensor<[8,15,64],f32> -> !torch.vtensor<[8,4,64],f32>
    %1021 = torch.aten.view %1020, %558 : !torch.vtensor<[8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %1022 = torch.aten.transpose.int %1021, %int1, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %1023 = torch.aten.clone %1022, %int0 : !torch.vtensor<[1,4,8,64],f32>, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %1024 = torch.aten.view %1023, %126 : !torch.vtensor<[1,4,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1025 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1026 = torch.aten.view %1024, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1027 = torch.aten.mm %1026, %1025 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %1028 = torch.aten.view %1027, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1029 = torch.aten.add.Tensor %978, %1028, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1030 = torch.aten.pow.Tensor_Scalar %1029, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1031 = torch.aten.sum.dim_IntList %1030, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %1032 = torch.aten.div.Scalar %1031, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1033 = torch.aten.add.Scalar %1032, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1034 = torch.aten.rsqrt %1033 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %1035 = torch.aten.mul.Tensor %1029, %1034 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %1036 = torch.aten.mul.Tensor %4, %1035 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %1037 = torch.aten.transpose.int %1, %int0, %int1 : !torch.vtensor<[2048,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,2048],f32>
    %1038 = torch.aten.view %1036, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1039 = torch.aten.mm %1038, %1037 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,2048],f32> -> !torch.vtensor<[4,2048],f32>
    %1040 = torch.aten.view %1039, %681 : !torch.vtensor<[4,2048],f32>, !torch.list<int> -> !torch.vtensor<[1,4,2048],f32>
    %1041 = torch.aten.relu %1040 : !torch.vtensor<[1,4,2048],f32> -> !torch.vtensor<[1,4,2048],f32>
    %1042 = torch.aten.transpose.int %0, %int0, %int1 : !torch.vtensor<[512,2048],f32>, !torch.int, !torch.int -> !torch.vtensor<[2048,512],f32>
    %1043 = torch.aten.view %1041, %685 : !torch.vtensor<[1,4,2048],f32>, !torch.list<int> -> !torch.vtensor<[4,2048],f32>
    %1044 = torch.aten.mm %1043, %1042 : !torch.vtensor<[4,2048],f32>, !torch.vtensor<[2048,512],f32> -> !torch.vtensor<[4,512],f32>
    %1045 = torch.aten.view %1044, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1046 = torch.aten.add.Tensor %1029, %1045, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1047 = torch.aten.pow.Tensor_Scalar %1046, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1048 = torch.aten.sum.dim_IntList %1047, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %1049 = torch.aten.div.Scalar %1048, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1050 = torch.aten.add.Scalar %1049, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1051 = torch.aten.rsqrt %1050 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %1052 = torch.aten.mul.Tensor %1046, %1051 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %1053 = torch.aten.mul.Tensor %4, %1052 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %1054 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1055 = torch.aten.view %1053, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1056 = torch.aten.mm %1055, %1054 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %1057 = torch.aten.view %1056, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1058 = torch.aten.view %1057, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %1059 = torch.aten.transpose.int %1058, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %1060 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1061 = torch.aten.view %1053, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1062 = torch.aten.mm %1061, %1060 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %1063 = torch.aten.view %1062, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1064 = torch.aten.view %1063, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %1065 = torch.aten.transpose.int %1064, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %1066 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1067 = torch.aten.view %1053, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1068 = torch.aten.mm %1067, %1066 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %1069 = torch.aten.view %1068, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1070 = torch.aten.view %1069, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %1071 = torch.aten.transpose.int %1070, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %1072 = torch.aten.transpose.int %1065, %int3, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,4],f32>
    %1073 = torch.aten.broadcast_to %1059, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %1074 = torch.aten.view %1073, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %1075 = torch.aten.broadcast_to %1072, %562 : !torch.vtensor<[1,8,64,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,4],f32>
    %1076 = torch.aten.view %1075, %564 : !torch.vtensor<[1,8,64,4],f32>, !torch.list<int> -> !torch.vtensor<[8,64,4],f32>
    %1077 = torch.aten.bmm %1074, %1076 : !torch.vtensor<[8,4,64],f32>, !torch.vtensor<[8,64,4],f32> -> !torch.vtensor<[8,4,4],f32>
    %1078 = torch.aten.view %1077, %567 : !torch.vtensor<[8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,4],f32>
    %1079 = torch.aten.add.Tensor %1078, %595, %int1 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,4],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %values_26, %indices_27 = torch.aten.max.dim %1079, %int-1, %true : !torch.vtensor<[1,8,4,4],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,4,1],f32>, !torch.vtensor<[1,8,4,1],si64>
    %1080 = torch.aten.sub.Tensor %1079, %values_26, %int1 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,1],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %1081 = torch.aten.exp %1080 : !torch.vtensor<[1,8,4,4],f32> -> !torch.vtensor<[1,8,4,4],f32>
    %1082 = torch.aten.sum.dim_IntList %1081, %43, %true, %none : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,4,1],f32>
    %1083 = torch.aten.div.Tensor %1081, %1082 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,1],f32> -> !torch.vtensor<[1,8,4,4],f32>
    %1084 = torch.aten.broadcast_to %1083, %567 : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,4],f32>
    %1085 = torch.aten.view %1084, %602 : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[8,4,4],f32>
    %1086 = torch.aten.broadcast_to %1071, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %1087 = torch.aten.view %1086, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %1088 = torch.aten.bmm %1085, %1087 : !torch.vtensor<[8,4,4],f32>, !torch.vtensor<[8,4,64],f32> -> !torch.vtensor<[8,4,64],f32>
    %1089 = torch.aten.view %1088, %558 : !torch.vtensor<[8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %1090 = torch.aten.transpose.int %1089, %int1, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %1091 = torch.aten.clone %1090, %int0 : !torch.vtensor<[1,4,8,64],f32>, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %1092 = torch.aten.view %1091, %126 : !torch.vtensor<[1,4,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1093 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1094 = torch.aten.view %1092, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1095 = torch.aten.mm %1094, %1093 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %1096 = torch.aten.view %1095, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1097 = torch.aten.add.Tensor %1046, %1096, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1098 = torch.aten.pow.Tensor_Scalar %1097, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1099 = torch.aten.sum.dim_IntList %1098, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %1100 = torch.aten.div.Scalar %1099, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1101 = torch.aten.add.Scalar %1100, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1102 = torch.aten.rsqrt %1101 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %1103 = torch.aten.mul.Tensor %1097, %1102 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %1104 = torch.aten.mul.Tensor %4, %1103 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %1105 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1106 = torch.aten.view %1104, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1107 = torch.aten.mm %1106, %1105 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %1108 = torch.aten.view %1107, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1109 = torch.aten.view %1108, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %1110 = torch.aten.transpose.int %1109, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %1111 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1112 = torch.aten.view %498, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %1113 = torch.aten.mm %1112, %1111 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %1114 = torch.aten.view %1113, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %1115 = torch.aten.view %1114, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %1116 = torch.aten.transpose.int %1115, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %1117 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1118 = torch.aten.view %498, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %1119 = torch.aten.mm %1118, %1117 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %1120 = torch.aten.view %1119, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %1121 = torch.aten.view %1120, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %1122 = torch.aten.transpose.int %1121, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %1123 = torch.aten.transpose.int %1116, %int3, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,15],f32>
    %1124 = torch.aten.broadcast_to %1110, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %1125 = torch.aten.view %1124, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %1126 = torch.aten.broadcast_to %1123, %76 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,15],f32>
    %1127 = torch.aten.view %1126, %78 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[8,64,15],f32>
    %1128 = torch.aten.bmm %1125, %1127 : !torch.vtensor<[8,4,64],f32>, !torch.vtensor<[8,64,15],f32> -> !torch.vtensor<[8,4,15],f32>
    %1129 = torch.aten.view %1128, %647 : !torch.vtensor<[8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,15],f32>
    %1130 = torch.aten.add.Tensor %1129, %650, %int1 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,15],f32>, !torch.int -> !torch.vtensor<[1,8,4,15],f32>
    %values_28, %indices_29 = torch.aten.max.dim %1130, %int-1, %true : !torch.vtensor<[1,8,4,15],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,4,1],f32>, !torch.vtensor<[1,8,4,1],si64>
    %1131 = torch.aten.sub.Tensor %1130, %values_28, %int1 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,1],f32>, !torch.int -> !torch.vtensor<[1,8,4,15],f32>
    %1132 = torch.aten.exp %1131 : !torch.vtensor<[1,8,4,15],f32> -> !torch.vtensor<[1,8,4,15],f32>
    %1133 = torch.aten.sum.dim_IntList %1132, %43, %true, %none : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,4,1],f32>
    %1134 = torch.aten.div.Tensor %1132, %1133 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,1],f32> -> !torch.vtensor<[1,8,4,15],f32>
    %1135 = torch.aten.broadcast_to %1134, %647 : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,15],f32>
    %1136 = torch.aten.view %1135, %657 : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[8,4,15],f32>
    %1137 = torch.aten.broadcast_to %1122, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %1138 = torch.aten.view %1137, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %1139 = torch.aten.bmm %1136, %1138 : !torch.vtensor<[8,4,15],f32>, !torch.vtensor<[8,15,64],f32> -> !torch.vtensor<[8,4,64],f32>
    %1140 = torch.aten.view %1139, %558 : !torch.vtensor<[8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %1141 = torch.aten.transpose.int %1140, %int1, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %1142 = torch.aten.clone %1141, %int0 : !torch.vtensor<[1,4,8,64],f32>, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %1143 = torch.aten.view %1142, %126 : !torch.vtensor<[1,4,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1144 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1145 = torch.aten.view %1143, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1146 = torch.aten.mm %1145, %1144 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %1147 = torch.aten.view %1146, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1148 = torch.aten.add.Tensor %1097, %1147, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1149 = torch.aten.pow.Tensor_Scalar %1148, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1150 = torch.aten.sum.dim_IntList %1149, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %1151 = torch.aten.div.Scalar %1150, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1152 = torch.aten.add.Scalar %1151, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1153 = torch.aten.rsqrt %1152 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %1154 = torch.aten.mul.Tensor %1148, %1153 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %1155 = torch.aten.mul.Tensor %4, %1154 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %1156 = torch.aten.transpose.int %1, %int0, %int1 : !torch.vtensor<[2048,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,2048],f32>
    %1157 = torch.aten.view %1155, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1158 = torch.aten.mm %1157, %1156 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,2048],f32> -> !torch.vtensor<[4,2048],f32>
    %1159 = torch.aten.view %1158, %681 : !torch.vtensor<[4,2048],f32>, !torch.list<int> -> !torch.vtensor<[1,4,2048],f32>
    %1160 = torch.aten.relu %1159 : !torch.vtensor<[1,4,2048],f32> -> !torch.vtensor<[1,4,2048],f32>
    %1161 = torch.aten.transpose.int %0, %int0, %int1 : !torch.vtensor<[512,2048],f32>, !torch.int, !torch.int -> !torch.vtensor<[2048,512],f32>
    %1162 = torch.aten.view %1160, %685 : !torch.vtensor<[1,4,2048],f32>, !torch.list<int> -> !torch.vtensor<[4,2048],f32>
    %1163 = torch.aten.mm %1162, %1161 : !torch.vtensor<[4,2048],f32>, !torch.vtensor<[2048,512],f32> -> !torch.vtensor<[4,512],f32>
    %1164 = torch.aten.view %1163, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1165 = torch.aten.add.Tensor %1148, %1164, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1166 = torch.aten.pow.Tensor_Scalar %1165, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1167 = torch.aten.sum.dim_IntList %1166, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %1168 = torch.aten.div.Scalar %1167, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1169 = torch.aten.add.Scalar %1168, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1170 = torch.aten.rsqrt %1169 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %1171 = torch.aten.mul.Tensor %1165, %1170 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %1172 = torch.aten.mul.Tensor %4, %1171 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %1173 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1174 = torch.aten.view %1172, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1175 = torch.aten.mm %1174, %1173 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %1176 = torch.aten.view %1175, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1177 = torch.aten.view %1176, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %1178 = torch.aten.transpose.int %1177, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %1179 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1180 = torch.aten.view %1172, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1181 = torch.aten.mm %1180, %1179 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %1182 = torch.aten.view %1181, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1183 = torch.aten.view %1182, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %1184 = torch.aten.transpose.int %1183, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %1185 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1186 = torch.aten.view %1172, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1187 = torch.aten.mm %1186, %1185 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %1188 = torch.aten.view %1187, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1189 = torch.aten.view %1188, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %1190 = torch.aten.transpose.int %1189, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %1191 = torch.aten.transpose.int %1184, %int3, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,4],f32>
    %1192 = torch.aten.broadcast_to %1178, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %1193 = torch.aten.view %1192, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %1194 = torch.aten.broadcast_to %1191, %562 : !torch.vtensor<[1,8,64,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,4],f32>
    %1195 = torch.aten.view %1194, %564 : !torch.vtensor<[1,8,64,4],f32>, !torch.list<int> -> !torch.vtensor<[8,64,4],f32>
    %1196 = torch.aten.bmm %1193, %1195 : !torch.vtensor<[8,4,64],f32>, !torch.vtensor<[8,64,4],f32> -> !torch.vtensor<[8,4,4],f32>
    %1197 = torch.aten.view %1196, %567 : !torch.vtensor<[8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,4],f32>
    %1198 = torch.aten.add.Tensor %1197, %595, %int1 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,4],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %values_30, %indices_31 = torch.aten.max.dim %1198, %int-1, %true : !torch.vtensor<[1,8,4,4],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,4,1],f32>, !torch.vtensor<[1,8,4,1],si64>
    %1199 = torch.aten.sub.Tensor %1198, %values_30, %int1 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,1],f32>, !torch.int -> !torch.vtensor<[1,8,4,4],f32>
    %1200 = torch.aten.exp %1199 : !torch.vtensor<[1,8,4,4],f32> -> !torch.vtensor<[1,8,4,4],f32>
    %1201 = torch.aten.sum.dim_IntList %1200, %43, %true, %none : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,4,1],f32>
    %1202 = torch.aten.div.Tensor %1200, %1201 : !torch.vtensor<[1,8,4,4],f32>, !torch.vtensor<[1,8,4,1],f32> -> !torch.vtensor<[1,8,4,4],f32>
    %1203 = torch.aten.broadcast_to %1202, %567 : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,4],f32>
    %1204 = torch.aten.view %1203, %602 : !torch.vtensor<[1,8,4,4],f32>, !torch.list<int> -> !torch.vtensor<[8,4,4],f32>
    %1205 = torch.aten.broadcast_to %1190, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %1206 = torch.aten.view %1205, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %1207 = torch.aten.bmm %1204, %1206 : !torch.vtensor<[8,4,4],f32>, !torch.vtensor<[8,4,64],f32> -> !torch.vtensor<[8,4,64],f32>
    %1208 = torch.aten.view %1207, %558 : !torch.vtensor<[8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %1209 = torch.aten.transpose.int %1208, %int1, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %1210 = torch.aten.clone %1209, %int0 : !torch.vtensor<[1,4,8,64],f32>, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %1211 = torch.aten.view %1210, %126 : !torch.vtensor<[1,4,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1212 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1213 = torch.aten.view %1211, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1214 = torch.aten.mm %1213, %1212 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %1215 = torch.aten.view %1214, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1216 = torch.aten.add.Tensor %1165, %1215, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1217 = torch.aten.pow.Tensor_Scalar %1216, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1218 = torch.aten.sum.dim_IntList %1217, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %1219 = torch.aten.div.Scalar %1218, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1220 = torch.aten.add.Scalar %1219, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1221 = torch.aten.rsqrt %1220 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %1222 = torch.aten.mul.Tensor %1216, %1221 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %1223 = torch.aten.mul.Tensor %4, %1222 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %1224 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1225 = torch.aten.view %1223, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1226 = torch.aten.mm %1225, %1224 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %1227 = torch.aten.view %1226, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1228 = torch.aten.view %1227, %56 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,8,64],f32>
    %1229 = torch.aten.transpose.int %1228, %int1, %int2 : !torch.vtensor<[1,4,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,4,64],f32>
    %1230 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1231 = torch.aten.view %498, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %1232 = torch.aten.mm %1231, %1230 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %1233 = torch.aten.view %1232, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %1234 = torch.aten.view %1233, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %1235 = torch.aten.transpose.int %1234, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %1236 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1237 = torch.aten.view %498, %51 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[15,512],f32>
    %1238 = torch.aten.mm %1237, %1236 : !torch.vtensor<[15,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[15,512],f32>
    %1239 = torch.aten.view %1238, %54 : !torch.vtensor<[15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,512],f32>
    %1240 = torch.aten.view %1239, %56 : !torch.vtensor<[1,15,512],f32>, !torch.list<int> -> !torch.vtensor<[1,15,8,64],f32>
    %1241 = torch.aten.transpose.int %1240, %int1, %int2 : !torch.vtensor<[1,15,8,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,15,64],f32>
    %1242 = torch.aten.transpose.int %1235, %int3, %int2 : !torch.vtensor<[1,8,15,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,8,64,15],f32>
    %1243 = torch.aten.broadcast_to %1229, %558 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %1244 = torch.aten.view %1243, %560 : !torch.vtensor<[1,8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[8,4,64],f32>
    %1245 = torch.aten.broadcast_to %1242, %76 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,64,15],f32>
    %1246 = torch.aten.view %1245, %78 : !torch.vtensor<[1,8,64,15],f32>, !torch.list<int> -> !torch.vtensor<[8,64,15],f32>
    %1247 = torch.aten.bmm %1244, %1246 : !torch.vtensor<[8,4,64],f32>, !torch.vtensor<[8,64,15],f32> -> !torch.vtensor<[8,4,15],f32>
    %1248 = torch.aten.view %1247, %647 : !torch.vtensor<[8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,15],f32>
    %1249 = torch.aten.add.Tensor %1248, %650, %int1 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,15],f32>, !torch.int -> !torch.vtensor<[1,8,4,15],f32>
    %values_32, %indices_33 = torch.aten.max.dim %1249, %int-1, %true : !torch.vtensor<[1,8,4,15],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,8,4,1],f32>, !torch.vtensor<[1,8,4,1],si64>
    %1250 = torch.aten.sub.Tensor %1249, %values_32, %int1 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,1],f32>, !torch.int -> !torch.vtensor<[1,8,4,15],f32>
    %1251 = torch.aten.exp %1250 : !torch.vtensor<[1,8,4,15],f32> -> !torch.vtensor<[1,8,4,15],f32>
    %1252 = torch.aten.sum.dim_IntList %1251, %43, %true, %none : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,8,4,1],f32>
    %1253 = torch.aten.div.Tensor %1251, %1252 : !torch.vtensor<[1,8,4,15],f32>, !torch.vtensor<[1,8,4,1],f32> -> !torch.vtensor<[1,8,4,15],f32>
    %1254 = torch.aten.broadcast_to %1253, %647 : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,15],f32>
    %1255 = torch.aten.view %1254, %657 : !torch.vtensor<[1,8,4,15],f32>, !torch.list<int> -> !torch.vtensor<[8,4,15],f32>
    %1256 = torch.aten.broadcast_to %1241, %72 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,15,64],f32>
    %1257 = torch.aten.view %1256, %74 : !torch.vtensor<[1,8,15,64],f32>, !torch.list<int> -> !torch.vtensor<[8,15,64],f32>
    %1258 = torch.aten.bmm %1255, %1257 : !torch.vtensor<[8,4,15],f32>, !torch.vtensor<[8,15,64],f32> -> !torch.vtensor<[8,4,64],f32>
    %1259 = torch.aten.view %1258, %558 : !torch.vtensor<[8,4,64],f32>, !torch.list<int> -> !torch.vtensor<[1,8,4,64],f32>
    %1260 = torch.aten.transpose.int %1259, %int1, %int2 : !torch.vtensor<[1,8,4,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %1261 = torch.aten.clone %1260, %int0 : !torch.vtensor<[1,4,8,64],f32>, !torch.int -> !torch.vtensor<[1,4,8,64],f32>
    %1262 = torch.aten.view %1261, %126 : !torch.vtensor<[1,4,8,64],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1263 = torch.aten.transpose.int %3, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %1264 = torch.aten.view %1262, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1265 = torch.aten.mm %1264, %1263 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4,512],f32>
    %1266 = torch.aten.view %1265, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1267 = torch.aten.add.Tensor %1216, %1266, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1268 = torch.aten.pow.Tensor_Scalar %1267, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1269 = torch.aten.sum.dim_IntList %1268, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %1270 = torch.aten.div.Scalar %1269, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1271 = torch.aten.add.Scalar %1270, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1272 = torch.aten.rsqrt %1271 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %1273 = torch.aten.mul.Tensor %1267, %1272 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %1274 = torch.aten.mul.Tensor %4, %1273 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %1275 = torch.aten.transpose.int %1, %int0, %int1 : !torch.vtensor<[2048,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,2048],f32>
    %1276 = torch.aten.view %1274, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1277 = torch.aten.mm %1276, %1275 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,2048],f32> -> !torch.vtensor<[4,2048],f32>
    %1278 = torch.aten.view %1277, %681 : !torch.vtensor<[4,2048],f32>, !torch.list<int> -> !torch.vtensor<[1,4,2048],f32>
    %1279 = torch.aten.relu %1278 : !torch.vtensor<[1,4,2048],f32> -> !torch.vtensor<[1,4,2048],f32>
    %1280 = torch.aten.transpose.int %0, %int0, %int1 : !torch.vtensor<[512,2048],f32>, !torch.int, !torch.int -> !torch.vtensor<[2048,512],f32>
    %1281 = torch.aten.view %1279, %685 : !torch.vtensor<[1,4,2048],f32>, !torch.list<int> -> !torch.vtensor<[4,2048],f32>
    %1282 = torch.aten.mm %1281, %1280 : !torch.vtensor<[4,2048],f32>, !torch.vtensor<[2048,512],f32> -> !torch.vtensor<[4,512],f32>
    %1283 = torch.aten.view %1282, %541 : !torch.vtensor<[4,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4,512],f32>
    %1284 = torch.aten.add.Tensor %1267, %1283, %int1 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1285 = torch.aten.pow.Tensor_Scalar %1284, %int2 : !torch.vtensor<[1,4,512],f32>, !torch.int -> !torch.vtensor<[1,4,512],f32>
    %1286 = torch.aten.sum.dim_IntList %1285, %43, %true, %none : !torch.vtensor<[1,4,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4,1],f32>
    %1287 = torch.aten.div.Scalar %1286, %int512 : !torch.vtensor<[1,4,1],f32>, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1288 = torch.aten.add.Scalar %1287, %float9.999990e-07, %int1 : !torch.vtensor<[1,4,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,4,1],f32>
    %1289 = torch.aten.rsqrt %1288 : !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,1],f32>
    %1290 = torch.aten.mul.Tensor %1284, %1289 : !torch.vtensor<[1,4,512],f32>, !torch.vtensor<[1,4,1],f32> -> !torch.vtensor<[1,4,512],f32>
    %1291 = torch.aten.mul.Tensor %4, %1290 : !torch.vtensor<[512],f32>, !torch.vtensor<[1,4,512],f32> -> !torch.vtensor<[1,4,512],f32>
    %1292 = torch.aten.mul.Scalar %1291, %float4.419420e-02 : !torch.vtensor<[1,4,512],f32>, !torch.float -> !torch.vtensor<[1,4,512],f32>
    %1293 = torch.aten.transpose.int %5, %int0, %int1 : !torch.vtensor<[32128,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,32128],f32>
    %1294 = torch.aten.view %1292, %538 : !torch.vtensor<[1,4,512],f32>, !torch.list<int> -> !torch.vtensor<[4,512],f32>
    %1295 = torch.aten.mm %1294, %1293 : !torch.vtensor<[4,512],f32>, !torch.vtensor<[512,32128],f32> -> !torch.vtensor<[4,32128],f32>
    %1296 = torch.prim.ListConstruct %int1, %int4, %int32128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1297 = torch.aten.view %1295, %1296 : !torch.vtensor<[4,32128],f32>, !torch.list<int> -> !torch.vtensor<[1,4,32128],f32>
    return %1297 : !torch.vtensor<[1,4,32128],f32>
  }
 }