Created
June 22, 2022 13:24
-
-
Save pashu123/d167d247bac7fd3fffeb3f31db7f7819 to your computer and use it in GitHub Desktop.
After canonicalization minilm.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| module attributes {torch.debug_module_name = "MiniLMSequenceClassification"} { | |
| func.func @forward(%arg0: !torch.vtensor<[1,128],si32>, %arg1: !torch.vtensor<[1,128],si32>, %arg2: !torch.vtensor<[1,128],si32>) -> !torch.vtensor<[1,2],f32> { | |
| %int1 = torch.constant.int 1 | |
| %none = torch.constant.none | |
| %true = torch.constant.bool true | |
| %float1.000000e00 = torch.constant.float 1.000000e+00 | |
| %int128 = torch.constant.int 128 | |
| %int0 = torch.constant.int 0 | |
| %int9223372036854775807 = torch.constant.int 9223372036854775807 | |
| %float9.999990e-13 = torch.constant.float 9.9999999999999998E-13 | |
| %int384 = torch.constant.int 384 | |
| %str = torch.constant.str "none" | |
| %int-2 = torch.constant.int -2 | |
| %int-1 = torch.constant.int -1 | |
| %int3 = torch.constant.int 3 | |
| %int2 = torch.constant.int 2 | |
| %int32 = torch.constant.int 32 | |
| %int12 = torch.constant.int 12 | |
| %int6 = torch.constant.int 6 | |
| %false = torch.constant.bool false | |
| %0 = torch.vtensor.literal(dense<-1.000000e+04> : tensor<f64>) : !torch.vtensor<[],f64> | |
| %1 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1x512xsi64>) : !torch.vtensor<[1,512],si64> | |
| %2 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<30522x384xf32>) : !torch.vtensor<[30522,384],f32> | |
| %3 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<2x384xf32>) : !torch.vtensor<[2,384],f32> | |
| %4 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<512x384xf32>) : !torch.vtensor<[512,384],f32> | |
| %5 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %6 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %7 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %8 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %9 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %10 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %11 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %12 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %13 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %14 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %15 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %16 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
| %17 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
| %18 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %19 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
| %20 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %21 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %22 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %23 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %24 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %25 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %26 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %27 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %28 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %29 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %30 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %31 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
| %32 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
| %33 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %34 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
| %35 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %36 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %37 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %38 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %39 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %40 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %41 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %42 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %43 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %44 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %45 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %46 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
| %47 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
| %48 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %49 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
| %50 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %51 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %52 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %53 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %54 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %55 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %56 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %57 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %58 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %59 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %60 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %61 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
| %62 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
| %63 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %64 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
| %65 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %66 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %67 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %68 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %69 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %70 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %71 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %72 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %73 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %74 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %75 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %76 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
| %77 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
| %78 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %79 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
| %80 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %81 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %82 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %83 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %84 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %85 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %86 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %87 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %88 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %89 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %90 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %91 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
| %92 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
| %93 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %94 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
| %95 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %96 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %97 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %98 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %99 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %100 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %101 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %102 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %103 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %104 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %105 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %106 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
| %107 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
| %108 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %109 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
| %110 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %111 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %112 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %113 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %114 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %115 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %116 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %117 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %118 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %119 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %120 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %121 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
| %122 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
| %123 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %124 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
| %125 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %126 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %127 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %128 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %129 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %130 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %131 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %132 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %133 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %134 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %135 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %136 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
| %137 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
| %138 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %139 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
| %140 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %141 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %142 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %143 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %144 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %145 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %146 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %147 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %148 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %149 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %150 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %151 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
| %152 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
| %153 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %154 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
| %155 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %156 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %157 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %158 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %159 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %160 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %161 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %162 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %163 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %164 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %165 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %166 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
| %167 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
| %168 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %169 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
| %170 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %171 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %172 = torch.vtensor.literal(dense<5.6568542494923806> : tensor<f64>) : !torch.vtensor<[],f64> | |
| %173 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %174 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %175 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %176 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %177 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %178 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %179 = torch.vtensor.literal(dense<0.000000e+00> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
| %180 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
| %181 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
| %182 = torch.vtensor.literal(dense<0.455810547> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %183 = torch.vtensor.literal(dense<0.000000e+00> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
| %184 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
| %185 = torch.vtensor.literal(dense<0.000000e+00> : tensor<2xf32>) : !torch.vtensor<[2],f32> | |
| %186 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<2x384xf32>) : !torch.vtensor<[2,384],f32> | |
| %187 = torch.aten.slice.Tensor %arg1, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,128],si32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,128],si32> | |
| %188 = torch.aten.unsqueeze %187, %int1 : !torch.vtensor<[1,128],si32>, !torch.int -> !torch.vtensor<[1,1,128],si32> | |
| %189 = torch.aten.unsqueeze %188, %int2 : !torch.vtensor<[1,1,128],si32>, !torch.int -> !torch.vtensor<[1,1,1,128],si32> | |
| %190 = torch.aten.slice.Tensor %189, %int3, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,1,1,128],si32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,1,1,128],si32> | |
| %191 = torch.aten.to.dtype %190, %int6, %false, %false, %none : !torch.vtensor<[1,1,1,128],si32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,1,1,128],f32> | |
| %192 = torch.aten.rsub.Scalar %191, %float1.000000e00, %int1 : !torch.vtensor<[1,1,1,128],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,1,1,128],f32> | |
| %193 = torch.aten.mul.Tensor %192, %0 : !torch.vtensor<[1,1,1,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,1,1,128],f32> | |
| %194 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %195 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %196 = torch.aten.slice.Tensor %1, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,512],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,512],si64> | |
| %197 = torch.aten.slice.Tensor %196, %int1, %int0, %int128, %int1 : !torch.vtensor<[1,512],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
| %198 = torch.aten.embedding %2, %arg0, %int0, %false, %false : !torch.vtensor<[30522,384],f32>, !torch.vtensor<[1,128],si32>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,128,384],f32> | |
| %199 = torch.aten.embedding %3, %arg2, %int-1, %false, %false : !torch.vtensor<[2,384],f32>, !torch.vtensor<[1,128],si32>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,128,384],f32> | |
| %200 = torch.aten.add.Tensor %198, %199, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %201 = torch.aten.embedding %4, %197, %int-1, %false, %false : !torch.vtensor<[512,384],f32>, !torch.vtensor<[1,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,?,384],f32> | |
| %202 = torch.aten.add.Tensor %200, %201, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,?,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %203 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
| %result0, %result1, %result2 = torch.aten.native_layer_norm %202, %203, %6, %5, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %204 = torch.aten.linear %result0, %8, %7 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %205 = torch.aten.linear %result0, %9, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %206 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %207 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %208 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
| %209 = torch.aten.view %205, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %210 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
| %211 = torch.aten.permute %209, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %212 = torch.aten.linear %result0, %11, %10 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %213 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %214 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %215 = torch.aten.view %212, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %216 = torch.aten.permute %215, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %217 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %218 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %219 = torch.aten.view %204, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %220 = torch.aten.permute %219, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %221 = torch.aten.transpose.int %211, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
| %222 = torch.aten.matmul %220, %221 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %223 = torch.aten.div.Tensor %222, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
| %224 = torch.aten.add.Tensor %223, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
| %values, %indices = torch.aten.max.dim %224, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
| %225 = torch.aten.sub.Tensor %224, %values, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
| %226 = torch.aten.exp %225 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %227 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
| %228 = torch.aten.sum.dim_IntList %226, %227, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
| %229 = torch.aten.div.Tensor %226, %228 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %230 = torch.aten.matmul %229, %216 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
| %231 = torch.aten.permute %230, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %232 = torch.aten.contiguous %231, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
| %233 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %234 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %235 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
| %236 = torch.aten.view %232, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
| %237 = torch.aten.linear %236, %13, %12 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %238 = torch.aten.add.Tensor %237, %result0, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_0, %result1_1, %result2_2 = torch.aten.native_layer_norm %238, %203, %15, %14, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %239 = torch.aten.linear %result0_0, %17, %16 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
| %240 = torch.aten.gelu %239, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
| %241 = torch.aten.linear %240, %19, %18 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %242 = torch.aten.add.Tensor %241, %result0_0, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_3, %result1_4, %result2_5 = torch.aten.native_layer_norm %242, %203, %21, %20, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %243 = torch.aten.linear %result0_3, %23, %22 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %244 = torch.aten.linear %result0_3, %24, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %245 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %246 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %247 = torch.aten.view %244, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %248 = torch.aten.permute %247, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %249 = torch.aten.linear %result0_3, %26, %25 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %250 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %251 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %252 = torch.aten.view %249, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %253 = torch.aten.permute %252, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %254 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %255 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %256 = torch.aten.view %243, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %257 = torch.aten.permute %256, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %258 = torch.aten.transpose.int %248, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
| %259 = torch.aten.matmul %257, %258 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %260 = torch.aten.div.Tensor %259, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
| %261 = torch.aten.add.Tensor %260, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
| %values_6, %indices_7 = torch.aten.max.dim %261, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
| %262 = torch.aten.sub.Tensor %261, %values_6, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
| %263 = torch.aten.exp %262 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %264 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
| %265 = torch.aten.sum.dim_IntList %263, %264, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
| %266 = torch.aten.div.Tensor %263, %265 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %267 = torch.aten.matmul %266, %253 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
| %268 = torch.aten.permute %267, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %269 = torch.aten.contiguous %268, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
| %270 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %271 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %272 = torch.aten.view %269, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
| %273 = torch.aten.linear %272, %28, %27 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %274 = torch.aten.add.Tensor %273, %result0_3, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_8, %result1_9, %result2_10 = torch.aten.native_layer_norm %274, %203, %30, %29, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %275 = torch.aten.linear %result0_8, %32, %31 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
| %276 = torch.aten.gelu %275, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
| %277 = torch.aten.linear %276, %34, %33 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %278 = torch.aten.add.Tensor %277, %result0_8, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_11, %result1_12, %result2_13 = torch.aten.native_layer_norm %278, %203, %36, %35, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %279 = torch.aten.linear %result0_11, %38, %37 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %280 = torch.aten.linear %result0_11, %39, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %281 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %282 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %283 = torch.aten.view %280, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %284 = torch.aten.permute %283, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %285 = torch.aten.linear %result0_11, %41, %40 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %286 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %287 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %288 = torch.aten.view %285, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %289 = torch.aten.permute %288, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %290 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %291 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %292 = torch.aten.view %279, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %293 = torch.aten.permute %292, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %294 = torch.aten.transpose.int %284, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
| %295 = torch.aten.matmul %293, %294 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %296 = torch.aten.div.Tensor %295, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
| %297 = torch.aten.add.Tensor %296, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
| %values_14, %indices_15 = torch.aten.max.dim %297, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
| %298 = torch.aten.sub.Tensor %297, %values_14, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
| %299 = torch.aten.exp %298 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %300 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
| %301 = torch.aten.sum.dim_IntList %299, %300, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
| %302 = torch.aten.div.Tensor %299, %301 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %303 = torch.aten.matmul %302, %289 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
| %304 = torch.aten.permute %303, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %305 = torch.aten.contiguous %304, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
| %306 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %307 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %308 = torch.aten.view %305, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
| %309 = torch.aten.linear %308, %43, %42 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %310 = torch.aten.add.Tensor %309, %result0_11, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_16, %result1_17, %result2_18 = torch.aten.native_layer_norm %310, %203, %45, %44, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %311 = torch.aten.linear %result0_16, %47, %46 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
| %312 = torch.aten.gelu %311, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
| %313 = torch.aten.linear %312, %49, %48 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %314 = torch.aten.add.Tensor %313, %result0_16, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_19, %result1_20, %result2_21 = torch.aten.native_layer_norm %314, %203, %51, %50, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %315 = torch.aten.linear %result0_19, %53, %52 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %316 = torch.aten.linear %result0_19, %54, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %317 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %318 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %319 = torch.aten.view %316, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %320 = torch.aten.permute %319, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %321 = torch.aten.linear %result0_19, %56, %55 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %322 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %323 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %324 = torch.aten.view %321, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %325 = torch.aten.permute %324, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %326 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %327 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %328 = torch.aten.view %315, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %329 = torch.aten.permute %328, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %330 = torch.aten.transpose.int %320, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
| %331 = torch.aten.matmul %329, %330 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %332 = torch.aten.div.Tensor %331, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
| %333 = torch.aten.add.Tensor %332, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
| %values_22, %indices_23 = torch.aten.max.dim %333, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
| %334 = torch.aten.sub.Tensor %333, %values_22, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
| %335 = torch.aten.exp %334 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %336 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
| %337 = torch.aten.sum.dim_IntList %335, %336, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
| %338 = torch.aten.div.Tensor %335, %337 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %339 = torch.aten.matmul %338, %325 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
| %340 = torch.aten.permute %339, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %341 = torch.aten.contiguous %340, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
| %342 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %343 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %344 = torch.aten.view %341, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
| %345 = torch.aten.linear %344, %58, %57 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %346 = torch.aten.add.Tensor %345, %result0_19, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %346, %203, %60, %59, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %347 = torch.aten.linear %result0_24, %62, %61 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
| %348 = torch.aten.gelu %347, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
| %349 = torch.aten.linear %348, %64, %63 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %350 = torch.aten.add.Tensor %349, %result0_24, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_27, %result1_28, %result2_29 = torch.aten.native_layer_norm %350, %203, %66, %65, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %351 = torch.aten.linear %result0_27, %68, %67 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %352 = torch.aten.linear %result0_27, %69, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %353 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %354 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %355 = torch.aten.view %352, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %356 = torch.aten.permute %355, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %357 = torch.aten.linear %result0_27, %71, %70 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %358 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %359 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %360 = torch.aten.view %357, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %361 = torch.aten.permute %360, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %362 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %363 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %364 = torch.aten.view %351, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %365 = torch.aten.permute %364, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %366 = torch.aten.transpose.int %356, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
| %367 = torch.aten.matmul %365, %366 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %368 = torch.aten.div.Tensor %367, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
| %369 = torch.aten.add.Tensor %368, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
| %values_30, %indices_31 = torch.aten.max.dim %369, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
| %370 = torch.aten.sub.Tensor %369, %values_30, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
| %371 = torch.aten.exp %370 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %372 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
| %373 = torch.aten.sum.dim_IntList %371, %372, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
| %374 = torch.aten.div.Tensor %371, %373 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %375 = torch.aten.matmul %374, %361 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
| %376 = torch.aten.permute %375, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %377 = torch.aten.contiguous %376, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
| %378 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %379 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %380 = torch.aten.view %377, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
| %381 = torch.aten.linear %380, %73, %72 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %382 = torch.aten.add.Tensor %381, %result0_27, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_32, %result1_33, %result2_34 = torch.aten.native_layer_norm %382, %203, %75, %74, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %383 = torch.aten.linear %result0_32, %77, %76 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
| %384 = torch.aten.gelu %383, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
| %385 = torch.aten.linear %384, %79, %78 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %386 = torch.aten.add.Tensor %385, %result0_32, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_35, %result1_36, %result2_37 = torch.aten.native_layer_norm %386, %203, %81, %80, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %387 = torch.aten.linear %result0_35, %83, %82 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %388 = torch.aten.linear %result0_35, %84, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %389 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %390 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %391 = torch.aten.view %388, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %392 = torch.aten.permute %391, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %393 = torch.aten.linear %result0_35, %86, %85 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %394 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %395 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %396 = torch.aten.view %393, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %397 = torch.aten.permute %396, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %398 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %399 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %400 = torch.aten.view %387, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %401 = torch.aten.permute %400, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %402 = torch.aten.transpose.int %392, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
| %403 = torch.aten.matmul %401, %402 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %404 = torch.aten.div.Tensor %403, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
| %405 = torch.aten.add.Tensor %404, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
| %values_38, %indices_39 = torch.aten.max.dim %405, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
| %406 = torch.aten.sub.Tensor %405, %values_38, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
| %407 = torch.aten.exp %406 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %408 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
| %409 = torch.aten.sum.dim_IntList %407, %408, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
| %410 = torch.aten.div.Tensor %407, %409 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %411 = torch.aten.matmul %410, %397 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
| %412 = torch.aten.permute %411, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %413 = torch.aten.contiguous %412, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
| %414 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %415 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %416 = torch.aten.view %413, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
| %417 = torch.aten.linear %416, %88, %87 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %418 = torch.aten.add.Tensor %417, %result0_35, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_40, %result1_41, %result2_42 = torch.aten.native_layer_norm %418, %203, %90, %89, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %419 = torch.aten.linear %result0_40, %92, %91 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
| %420 = torch.aten.gelu %419, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
| %421 = torch.aten.linear %420, %94, %93 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %422 = torch.aten.add.Tensor %421, %result0_40, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_43, %result1_44, %result2_45 = torch.aten.native_layer_norm %422, %203, %96, %95, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %423 = torch.aten.linear %result0_43, %98, %97 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %424 = torch.aten.linear %result0_43, %99, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %425 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %426 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %427 = torch.aten.view %424, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %428 = torch.aten.permute %427, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %429 = torch.aten.linear %result0_43, %101, %100 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %430 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %431 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %432 = torch.aten.view %429, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %433 = torch.aten.permute %432, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %434 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %435 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %436 = torch.aten.view %423, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %437 = torch.aten.permute %436, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %438 = torch.aten.transpose.int %428, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
| %439 = torch.aten.matmul %437, %438 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %440 = torch.aten.div.Tensor %439, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
| %441 = torch.aten.add.Tensor %440, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
| %values_46, %indices_47 = torch.aten.max.dim %441, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
| %442 = torch.aten.sub.Tensor %441, %values_46, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
| %443 = torch.aten.exp %442 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %444 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
| %445 = torch.aten.sum.dim_IntList %443, %444, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
| %446 = torch.aten.div.Tensor %443, %445 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %447 = torch.aten.matmul %446, %433 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
| %448 = torch.aten.permute %447, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %449 = torch.aten.contiguous %448, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
| %450 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %451 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %452 = torch.aten.view %449, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
| %453 = torch.aten.linear %452, %103, %102 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %454 = torch.aten.add.Tensor %453, %result0_43, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %454, %203, %105, %104, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %455 = torch.aten.linear %result0_48, %107, %106 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
| %456 = torch.aten.gelu %455, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
| %457 = torch.aten.linear %456, %109, %108 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %458 = torch.aten.add.Tensor %457, %result0_48, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %458, %203, %111, %110, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %459 = torch.aten.linear %result0_51, %113, %112 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %460 = torch.aten.linear %result0_51, %114, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %461 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %462 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %463 = torch.aten.view %460, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %464 = torch.aten.permute %463, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %465 = torch.aten.linear %result0_51, %116, %115 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %466 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %467 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %468 = torch.aten.view %465, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %469 = torch.aten.permute %468, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %470 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %471 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %472 = torch.aten.view %459, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %473 = torch.aten.permute %472, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %474 = torch.aten.transpose.int %464, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
| %475 = torch.aten.matmul %473, %474 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %476 = torch.aten.div.Tensor %475, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
| %477 = torch.aten.add.Tensor %476, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
| %values_54, %indices_55 = torch.aten.max.dim %477, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
| %478 = torch.aten.sub.Tensor %477, %values_54, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
| %479 = torch.aten.exp %478 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %480 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
| %481 = torch.aten.sum.dim_IntList %479, %480, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
| %482 = torch.aten.div.Tensor %479, %481 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %483 = torch.aten.matmul %482, %469 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
| %484 = torch.aten.permute %483, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %485 = torch.aten.contiguous %484, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
| %486 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %487 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %488 = torch.aten.view %485, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
| %489 = torch.aten.linear %488, %118, %117 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %490 = torch.aten.add.Tensor %489, %result0_51, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_56, %result1_57, %result2_58 = torch.aten.native_layer_norm %490, %203, %120, %119, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %491 = torch.aten.linear %result0_56, %122, %121 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
| %492 = torch.aten.gelu %491, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
| %493 = torch.aten.linear %492, %124, %123 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %494 = torch.aten.add.Tensor %493, %result0_56, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_59, %result1_60, %result2_61 = torch.aten.native_layer_norm %494, %203, %126, %125, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %495 = torch.aten.linear %result0_59, %128, %127 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %496 = torch.aten.linear %result0_59, %129, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %497 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %498 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %499 = torch.aten.view %496, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %500 = torch.aten.permute %499, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %501 = torch.aten.linear %result0_59, %131, %130 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %502 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %503 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %504 = torch.aten.view %501, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %505 = torch.aten.permute %504, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %506 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %507 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %508 = torch.aten.view %495, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %509 = torch.aten.permute %508, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %510 = torch.aten.transpose.int %500, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
| %511 = torch.aten.matmul %509, %510 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %512 = torch.aten.div.Tensor %511, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
| %513 = torch.aten.add.Tensor %512, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
| %values_62, %indices_63 = torch.aten.max.dim %513, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
| %514 = torch.aten.sub.Tensor %513, %values_62, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
| %515 = torch.aten.exp %514 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %516 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
| %517 = torch.aten.sum.dim_IntList %515, %516, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
| %518 = torch.aten.div.Tensor %515, %517 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %519 = torch.aten.matmul %518, %505 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
| %520 = torch.aten.permute %519, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %521 = torch.aten.contiguous %520, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
| %522 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %523 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %524 = torch.aten.view %521, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
| %525 = torch.aten.linear %524, %133, %132 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %526 = torch.aten.add.Tensor %525, %result0_59, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_64, %result1_65, %result2_66 = torch.aten.native_layer_norm %526, %203, %135, %134, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %527 = torch.aten.linear %result0_64, %137, %136 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
| %528 = torch.aten.gelu %527, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
| %529 = torch.aten.linear %528, %139, %138 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %530 = torch.aten.add.Tensor %529, %result0_64, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_67, %result1_68, %result2_69 = torch.aten.native_layer_norm %530, %203, %141, %140, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %531 = torch.aten.linear %result0_67, %143, %142 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %532 = torch.aten.linear %result0_67, %144, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %533 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %534 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %535 = torch.aten.view %532, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %536 = torch.aten.permute %535, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %537 = torch.aten.linear %result0_67, %146, %145 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %538 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %539 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %540 = torch.aten.view %537, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %541 = torch.aten.permute %540, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %542 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %543 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %544 = torch.aten.view %531, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %545 = torch.aten.permute %544, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %546 = torch.aten.transpose.int %536, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
| %547 = torch.aten.matmul %545, %546 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %548 = torch.aten.div.Tensor %547, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
| %549 = torch.aten.add.Tensor %548, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
| %values_70, %indices_71 = torch.aten.max.dim %549, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
| %550 = torch.aten.sub.Tensor %549, %values_70, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
| %551 = torch.aten.exp %550 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %552 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
| %553 = torch.aten.sum.dim_IntList %551, %552, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
| %554 = torch.aten.div.Tensor %551, %553 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %555 = torch.aten.matmul %554, %541 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
| %556 = torch.aten.permute %555, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %557 = torch.aten.contiguous %556, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
| %558 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %559 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %560 = torch.aten.view %557, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
| %561 = torch.aten.linear %560, %148, %147 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %562 = torch.aten.add.Tensor %561, %result0_67, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_72, %result1_73, %result2_74 = torch.aten.native_layer_norm %562, %203, %150, %149, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %563 = torch.aten.linear %result0_72, %152, %151 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
| %564 = torch.aten.gelu %563, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
| %565 = torch.aten.linear %564, %154, %153 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %566 = torch.aten.add.Tensor %565, %result0_72, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_75, %result1_76, %result2_77 = torch.aten.native_layer_norm %566, %203, %156, %155, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %567 = torch.aten.linear %result0_75, %158, %157 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %568 = torch.aten.linear %result0_75, %159, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %569 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %570 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %571 = torch.aten.view %568, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %572 = torch.aten.permute %571, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %573 = torch.aten.linear %result0_75, %161, %160 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %574 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %575 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %576 = torch.aten.view %573, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %577 = torch.aten.permute %576, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %578 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %579 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %580 = torch.aten.view %567, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %581 = torch.aten.permute %580, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %582 = torch.aten.transpose.int %572, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
| %583 = torch.aten.matmul %581, %582 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %584 = torch.aten.div.Tensor %583, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
| %585 = torch.aten.add.Tensor %584, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
| %values_78, %indices_79 = torch.aten.max.dim %585, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
| %586 = torch.aten.sub.Tensor %585, %values_78, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
| %587 = torch.aten.exp %586 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %588 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
| %589 = torch.aten.sum.dim_IntList %587, %588, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
| %590 = torch.aten.div.Tensor %587, %589 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %591 = torch.aten.matmul %590, %577 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
| %592 = torch.aten.permute %591, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %593 = torch.aten.contiguous %592, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
| %594 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %595 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %596 = torch.aten.view %593, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
| %597 = torch.aten.linear %596, %163, %162 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %598 = torch.aten.add.Tensor %597, %result0_75, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_80, %result1_81, %result2_82 = torch.aten.native_layer_norm %598, %203, %165, %164, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %599 = torch.aten.linear %result0_80, %167, %166 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
| %600 = torch.aten.gelu %599, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
| %601 = torch.aten.linear %600, %169, %168 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %602 = torch.aten.add.Tensor %601, %result0_80, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_83, %result1_84, %result2_85 = torch.aten.native_layer_norm %602, %203, %171, %170, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %603 = torch.aten.linear %result0_83, %174, %173 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %604 = torch.aten.linear %result0_83, %175, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %605 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %606 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %607 = torch.aten.view %604, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %608 = torch.aten.permute %607, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %609 = torch.aten.linear %result0_83, %177, %176 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %610 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %611 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %612 = torch.aten.view %609, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %613 = torch.aten.permute %612, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %614 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %615 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %616 = torch.aten.view %603, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %617 = torch.aten.permute %616, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
| %618 = torch.aten.transpose.int %608, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
| %619 = torch.aten.matmul %617, %618 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %620 = torch.aten.div.Tensor %619, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
| %621 = torch.aten.add.Tensor %620, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
| %values_86, %indices_87 = torch.aten.max.dim %621, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
| %622 = torch.aten.sub.Tensor %621, %values_86, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
| %623 = torch.aten.exp %622 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %624 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
| %625 = torch.aten.sum.dim_IntList %623, %624, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
| %626 = torch.aten.div.Tensor %623, %625 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
| %627 = torch.aten.matmul %626, %613 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
| %628 = torch.aten.permute %627, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
| %629 = torch.aten.contiguous %628, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
| %630 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
| %631 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
| %632 = torch.aten.view %629, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
| %633 = torch.aten.linear %632, %178, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %634 = torch.aten.add.Tensor %633, %result0_83, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_88, %result1_89, %result2_90 = torch.aten.native_layer_norm %634, %203, %182, %183, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %635 = torch.aten.linear %result0_88, %180, %179 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
| %636 = torch.aten.gelu %635, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
| %637 = torch.aten.linear %636, %181, %183 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
| %638 = torch.aten.add.Tensor %637, %result0_88, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %result0_91, %result1_92, %result2_93 = torch.aten.native_layer_norm %638, %203, %182, %183, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
| %639 = torch.aten.slice.Tensor %result0_91, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
| %640 = torch.aten.slice.Tensor %639, %int1, %int0, %int1, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,1,384],f32> | |
| %641 = torch.aten.squeeze.dim %640, %int1 : !torch.vtensor<[1,1,384],f32>, !torch.int -> !torch.vtensor<[1,384],f32> | |
| %642 = torch.aten.linear %641, %184, %183 : !torch.vtensor<[1,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,384],f32> | |
| %643 = torch.aten.tanh %642 : !torch.vtensor<[1,384],f32> -> !torch.vtensor<[1,384],f32> | |
| %644 = torch.aten.linear %643, %186, %185 : !torch.vtensor<[1,384],f32>, !torch.vtensor<[2,384],f32>, !torch.vtensor<[2],f32> -> !torch.vtensor<[1,2],f32> | |
| return %644 : !torch.vtensor<[1,2],f32> | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment