Created
June 22, 2022 13:24
-
-
Save pashu123/d167d247bac7fd3fffeb3f31db7f7819 to your computer and use it in GitHub Desktop.
After canonicalization minilm.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module attributes {torch.debug_module_name = "MiniLMSequenceClassification"} { | |
func.func @forward(%arg0: !torch.vtensor<[1,128],si32>, %arg1: !torch.vtensor<[1,128],si32>, %arg2: !torch.vtensor<[1,128],si32>) -> !torch.vtensor<[1,2],f32> { | |
%int1 = torch.constant.int 1 | |
%none = torch.constant.none | |
%true = torch.constant.bool true | |
%float1.000000e00 = torch.constant.float 1.000000e+00 | |
%int128 = torch.constant.int 128 | |
%int0 = torch.constant.int 0 | |
%int9223372036854775807 = torch.constant.int 9223372036854775807 | |
%float9.999990e-13 = torch.constant.float 9.9999999999999998E-13 | |
%int384 = torch.constant.int 384 | |
%str = torch.constant.str "none" | |
%int-2 = torch.constant.int -2 | |
%int-1 = torch.constant.int -1 | |
%int3 = torch.constant.int 3 | |
%int2 = torch.constant.int 2 | |
%int32 = torch.constant.int 32 | |
%int12 = torch.constant.int 12 | |
%int6 = torch.constant.int 6 | |
%false = torch.constant.bool false | |
%0 = torch.vtensor.literal(dense<-1.000000e+04> : tensor<f64>) : !torch.vtensor<[],f64> | |
%1 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1x512xsi64>) : !torch.vtensor<[1,512],si64> | |
%2 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<30522x384xf32>) : !torch.vtensor<[30522,384],f32> | |
%3 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<2x384xf32>) : !torch.vtensor<[2,384],f32> | |
%4 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<512x384xf32>) : !torch.vtensor<[512,384],f32> | |
%5 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%6 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%7 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%8 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%9 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%10 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%11 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%12 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%13 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%14 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%15 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%16 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
%17 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
%18 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%19 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
%20 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%21 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%22 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%23 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%24 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%25 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%26 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%27 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%28 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%29 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%30 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%31 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
%32 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
%33 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%34 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
%35 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%36 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%37 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%38 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%39 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%40 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%41 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%42 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%43 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%44 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%45 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%46 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
%47 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
%48 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%49 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
%50 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%51 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%52 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%53 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%54 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%55 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%56 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%57 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%58 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%59 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%60 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%61 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
%62 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
%63 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%64 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
%65 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%66 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%67 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%68 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%69 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%70 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%71 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%72 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%73 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%74 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%75 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%76 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
%77 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
%78 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%79 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
%80 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%81 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%82 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%83 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%84 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%85 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%86 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%87 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%88 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%89 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%90 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%91 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
%92 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
%93 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%94 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
%95 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%96 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%97 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%98 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%99 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%100 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%101 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%102 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%103 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%104 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%105 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%106 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
%107 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
%108 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%109 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
%110 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%111 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%112 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%113 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%114 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%115 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%116 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%117 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%118 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%119 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%120 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%121 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
%122 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
%123 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%124 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
%125 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%126 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%127 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%128 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%129 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%130 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%131 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%132 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%133 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%134 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%135 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%136 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
%137 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
%138 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%139 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
%140 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%141 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%142 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%143 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%144 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%145 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%146 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%147 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%148 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%149 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%150 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%151 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
%152 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
%153 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%154 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
%155 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%156 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%157 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%158 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%159 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%160 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%161 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%162 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%163 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%164 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%165 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%166 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
%167 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
%168 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%169 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
%170 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%171 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%172 = torch.vtensor.literal(dense<5.6568542494923806> : tensor<f64>) : !torch.vtensor<[],f64> | |
%173 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%174 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%175 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%176 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%177 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%178 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%179 = torch.vtensor.literal(dense<0.000000e+00> : tensor<1536xf32>) : !torch.vtensor<[1536],f32> | |
%180 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32> | |
%181 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32> | |
%182 = torch.vtensor.literal(dense<0.455810547> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%183 = torch.vtensor.literal(dense<0.000000e+00> : tensor<384xf32>) : !torch.vtensor<[384],f32> | |
%184 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32> | |
%185 = torch.vtensor.literal(dense<0.000000e+00> : tensor<2xf32>) : !torch.vtensor<[2],f32> | |
%186 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<2x384xf32>) : !torch.vtensor<[2,384],f32> | |
%187 = torch.aten.slice.Tensor %arg1, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,128],si32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,128],si32> | |
%188 = torch.aten.unsqueeze %187, %int1 : !torch.vtensor<[1,128],si32>, !torch.int -> !torch.vtensor<[1,1,128],si32> | |
%189 = torch.aten.unsqueeze %188, %int2 : !torch.vtensor<[1,1,128],si32>, !torch.int -> !torch.vtensor<[1,1,1,128],si32> | |
%190 = torch.aten.slice.Tensor %189, %int3, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,1,1,128],si32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,1,1,128],si32> | |
%191 = torch.aten.to.dtype %190, %int6, %false, %false, %none : !torch.vtensor<[1,1,1,128],si32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,1,1,128],f32> | |
%192 = torch.aten.rsub.Scalar %191, %float1.000000e00, %int1 : !torch.vtensor<[1,1,1,128],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,1,1,128],f32> | |
%193 = torch.aten.mul.Tensor %192, %0 : !torch.vtensor<[1,1,1,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,1,1,128],f32> | |
%194 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%195 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%196 = torch.aten.slice.Tensor %1, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,512],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,512],si64> | |
%197 = torch.aten.slice.Tensor %196, %int1, %int0, %int128, %int1 : !torch.vtensor<[1,512],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
%198 = torch.aten.embedding %2, %arg0, %int0, %false, %false : !torch.vtensor<[30522,384],f32>, !torch.vtensor<[1,128],si32>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,128,384],f32> | |
%199 = torch.aten.embedding %3, %arg2, %int-1, %false, %false : !torch.vtensor<[2,384],f32>, !torch.vtensor<[1,128],si32>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,128,384],f32> | |
%200 = torch.aten.add.Tensor %198, %199, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%201 = torch.aten.embedding %4, %197, %int-1, %false, %false : !torch.vtensor<[512,384],f32>, !torch.vtensor<[1,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,?,384],f32> | |
%202 = torch.aten.add.Tensor %200, %201, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,?,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%203 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0, %result1, %result2 = torch.aten.native_layer_norm %202, %203, %6, %5, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%204 = torch.aten.linear %result0, %8, %7 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%205 = torch.aten.linear %result0, %9, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%206 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%207 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%208 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%209 = torch.aten.view %205, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%210 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%211 = torch.aten.permute %209, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%212 = torch.aten.linear %result0, %11, %10 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%213 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%214 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%215 = torch.aten.view %212, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%216 = torch.aten.permute %215, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%217 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%218 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%219 = torch.aten.view %204, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%220 = torch.aten.permute %219, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%221 = torch.aten.transpose.int %211, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
%222 = torch.aten.matmul %220, %221 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%223 = torch.aten.div.Tensor %222, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
%224 = torch.aten.add.Tensor %223, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
%values, %indices = torch.aten.max.dim %224, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
%225 = torch.aten.sub.Tensor %224, %values, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
%226 = torch.aten.exp %225 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%227 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%228 = torch.aten.sum.dim_IntList %226, %227, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
%229 = torch.aten.div.Tensor %226, %228 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%230 = torch.aten.matmul %229, %216 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
%231 = torch.aten.permute %230, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%232 = torch.aten.contiguous %231, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
%233 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%234 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%235 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%236 = torch.aten.view %232, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
%237 = torch.aten.linear %236, %13, %12 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%238 = torch.aten.add.Tensor %237, %result0, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_0, %result1_1, %result2_2 = torch.aten.native_layer_norm %238, %203, %15, %14, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%239 = torch.aten.linear %result0_0, %17, %16 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
%240 = torch.aten.gelu %239, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
%241 = torch.aten.linear %240, %19, %18 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%242 = torch.aten.add.Tensor %241, %result0_0, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_3, %result1_4, %result2_5 = torch.aten.native_layer_norm %242, %203, %21, %20, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%243 = torch.aten.linear %result0_3, %23, %22 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%244 = torch.aten.linear %result0_3, %24, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%245 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%246 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%247 = torch.aten.view %244, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%248 = torch.aten.permute %247, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%249 = torch.aten.linear %result0_3, %26, %25 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%250 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%251 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%252 = torch.aten.view %249, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%253 = torch.aten.permute %252, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%254 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%255 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%256 = torch.aten.view %243, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%257 = torch.aten.permute %256, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%258 = torch.aten.transpose.int %248, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
%259 = torch.aten.matmul %257, %258 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%260 = torch.aten.div.Tensor %259, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
%261 = torch.aten.add.Tensor %260, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
%values_6, %indices_7 = torch.aten.max.dim %261, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
%262 = torch.aten.sub.Tensor %261, %values_6, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
%263 = torch.aten.exp %262 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%264 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%265 = torch.aten.sum.dim_IntList %263, %264, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
%266 = torch.aten.div.Tensor %263, %265 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%267 = torch.aten.matmul %266, %253 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
%268 = torch.aten.permute %267, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%269 = torch.aten.contiguous %268, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
%270 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%271 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%272 = torch.aten.view %269, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
%273 = torch.aten.linear %272, %28, %27 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%274 = torch.aten.add.Tensor %273, %result0_3, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_8, %result1_9, %result2_10 = torch.aten.native_layer_norm %274, %203, %30, %29, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%275 = torch.aten.linear %result0_8, %32, %31 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
%276 = torch.aten.gelu %275, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
%277 = torch.aten.linear %276, %34, %33 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%278 = torch.aten.add.Tensor %277, %result0_8, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_11, %result1_12, %result2_13 = torch.aten.native_layer_norm %278, %203, %36, %35, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%279 = torch.aten.linear %result0_11, %38, %37 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%280 = torch.aten.linear %result0_11, %39, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%281 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%282 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%283 = torch.aten.view %280, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%284 = torch.aten.permute %283, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%285 = torch.aten.linear %result0_11, %41, %40 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%286 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%287 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%288 = torch.aten.view %285, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%289 = torch.aten.permute %288, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%290 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%291 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%292 = torch.aten.view %279, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%293 = torch.aten.permute %292, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%294 = torch.aten.transpose.int %284, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
%295 = torch.aten.matmul %293, %294 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%296 = torch.aten.div.Tensor %295, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
%297 = torch.aten.add.Tensor %296, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
%values_14, %indices_15 = torch.aten.max.dim %297, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
%298 = torch.aten.sub.Tensor %297, %values_14, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
%299 = torch.aten.exp %298 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%300 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%301 = torch.aten.sum.dim_IntList %299, %300, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
%302 = torch.aten.div.Tensor %299, %301 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%303 = torch.aten.matmul %302, %289 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
%304 = torch.aten.permute %303, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%305 = torch.aten.contiguous %304, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
%306 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%307 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%308 = torch.aten.view %305, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
%309 = torch.aten.linear %308, %43, %42 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%310 = torch.aten.add.Tensor %309, %result0_11, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_16, %result1_17, %result2_18 = torch.aten.native_layer_norm %310, %203, %45, %44, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%311 = torch.aten.linear %result0_16, %47, %46 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
%312 = torch.aten.gelu %311, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
%313 = torch.aten.linear %312, %49, %48 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%314 = torch.aten.add.Tensor %313, %result0_16, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_19, %result1_20, %result2_21 = torch.aten.native_layer_norm %314, %203, %51, %50, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%315 = torch.aten.linear %result0_19, %53, %52 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%316 = torch.aten.linear %result0_19, %54, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%317 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%318 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%319 = torch.aten.view %316, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%320 = torch.aten.permute %319, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%321 = torch.aten.linear %result0_19, %56, %55 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%322 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%323 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%324 = torch.aten.view %321, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%325 = torch.aten.permute %324, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%326 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%327 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%328 = torch.aten.view %315, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%329 = torch.aten.permute %328, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%330 = torch.aten.transpose.int %320, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
%331 = torch.aten.matmul %329, %330 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%332 = torch.aten.div.Tensor %331, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
%333 = torch.aten.add.Tensor %332, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
%values_22, %indices_23 = torch.aten.max.dim %333, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
%334 = torch.aten.sub.Tensor %333, %values_22, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
%335 = torch.aten.exp %334 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%336 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%337 = torch.aten.sum.dim_IntList %335, %336, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
%338 = torch.aten.div.Tensor %335, %337 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%339 = torch.aten.matmul %338, %325 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
%340 = torch.aten.permute %339, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%341 = torch.aten.contiguous %340, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
%342 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%343 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%344 = torch.aten.view %341, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
%345 = torch.aten.linear %344, %58, %57 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%346 = torch.aten.add.Tensor %345, %result0_19, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %346, %203, %60, %59, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%347 = torch.aten.linear %result0_24, %62, %61 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
%348 = torch.aten.gelu %347, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
%349 = torch.aten.linear %348, %64, %63 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%350 = torch.aten.add.Tensor %349, %result0_24, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_27, %result1_28, %result2_29 = torch.aten.native_layer_norm %350, %203, %66, %65, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%351 = torch.aten.linear %result0_27, %68, %67 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%352 = torch.aten.linear %result0_27, %69, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%353 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%354 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%355 = torch.aten.view %352, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%356 = torch.aten.permute %355, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%357 = torch.aten.linear %result0_27, %71, %70 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%358 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%359 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%360 = torch.aten.view %357, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%361 = torch.aten.permute %360, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%362 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%363 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%364 = torch.aten.view %351, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%365 = torch.aten.permute %364, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%366 = torch.aten.transpose.int %356, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
%367 = torch.aten.matmul %365, %366 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%368 = torch.aten.div.Tensor %367, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
%369 = torch.aten.add.Tensor %368, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
%values_30, %indices_31 = torch.aten.max.dim %369, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
%370 = torch.aten.sub.Tensor %369, %values_30, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
%371 = torch.aten.exp %370 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%372 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%373 = torch.aten.sum.dim_IntList %371, %372, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
%374 = torch.aten.div.Tensor %371, %373 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%375 = torch.aten.matmul %374, %361 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
%376 = torch.aten.permute %375, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%377 = torch.aten.contiguous %376, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
%378 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%379 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%380 = torch.aten.view %377, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
%381 = torch.aten.linear %380, %73, %72 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%382 = torch.aten.add.Tensor %381, %result0_27, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_32, %result1_33, %result2_34 = torch.aten.native_layer_norm %382, %203, %75, %74, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%383 = torch.aten.linear %result0_32, %77, %76 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
%384 = torch.aten.gelu %383, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
%385 = torch.aten.linear %384, %79, %78 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%386 = torch.aten.add.Tensor %385, %result0_32, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_35, %result1_36, %result2_37 = torch.aten.native_layer_norm %386, %203, %81, %80, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%387 = torch.aten.linear %result0_35, %83, %82 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%388 = torch.aten.linear %result0_35, %84, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%389 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%390 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%391 = torch.aten.view %388, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%392 = torch.aten.permute %391, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%393 = torch.aten.linear %result0_35, %86, %85 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%394 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%395 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%396 = torch.aten.view %393, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%397 = torch.aten.permute %396, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%398 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%399 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%400 = torch.aten.view %387, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%401 = torch.aten.permute %400, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%402 = torch.aten.transpose.int %392, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
%403 = torch.aten.matmul %401, %402 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%404 = torch.aten.div.Tensor %403, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
%405 = torch.aten.add.Tensor %404, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
%values_38, %indices_39 = torch.aten.max.dim %405, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
%406 = torch.aten.sub.Tensor %405, %values_38, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
%407 = torch.aten.exp %406 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%408 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%409 = torch.aten.sum.dim_IntList %407, %408, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
%410 = torch.aten.div.Tensor %407, %409 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%411 = torch.aten.matmul %410, %397 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
%412 = torch.aten.permute %411, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%413 = torch.aten.contiguous %412, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
%414 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%415 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%416 = torch.aten.view %413, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
%417 = torch.aten.linear %416, %88, %87 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%418 = torch.aten.add.Tensor %417, %result0_35, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_40, %result1_41, %result2_42 = torch.aten.native_layer_norm %418, %203, %90, %89, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%419 = torch.aten.linear %result0_40, %92, %91 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
%420 = torch.aten.gelu %419, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
%421 = torch.aten.linear %420, %94, %93 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%422 = torch.aten.add.Tensor %421, %result0_40, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_43, %result1_44, %result2_45 = torch.aten.native_layer_norm %422, %203, %96, %95, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%423 = torch.aten.linear %result0_43, %98, %97 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%424 = torch.aten.linear %result0_43, %99, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%425 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%426 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%427 = torch.aten.view %424, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%428 = torch.aten.permute %427, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%429 = torch.aten.linear %result0_43, %101, %100 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%430 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%431 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%432 = torch.aten.view %429, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%433 = torch.aten.permute %432, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%434 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%435 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%436 = torch.aten.view %423, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%437 = torch.aten.permute %436, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%438 = torch.aten.transpose.int %428, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
%439 = torch.aten.matmul %437, %438 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%440 = torch.aten.div.Tensor %439, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
%441 = torch.aten.add.Tensor %440, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
%values_46, %indices_47 = torch.aten.max.dim %441, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
%442 = torch.aten.sub.Tensor %441, %values_46, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
%443 = torch.aten.exp %442 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%444 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%445 = torch.aten.sum.dim_IntList %443, %444, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
%446 = torch.aten.div.Tensor %443, %445 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%447 = torch.aten.matmul %446, %433 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
%448 = torch.aten.permute %447, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%449 = torch.aten.contiguous %448, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
%450 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%451 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%452 = torch.aten.view %449, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
%453 = torch.aten.linear %452, %103, %102 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%454 = torch.aten.add.Tensor %453, %result0_43, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %454, %203, %105, %104, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%455 = torch.aten.linear %result0_48, %107, %106 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
%456 = torch.aten.gelu %455, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
%457 = torch.aten.linear %456, %109, %108 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%458 = torch.aten.add.Tensor %457, %result0_48, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %458, %203, %111, %110, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%459 = torch.aten.linear %result0_51, %113, %112 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%460 = torch.aten.linear %result0_51, %114, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%461 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%462 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%463 = torch.aten.view %460, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%464 = torch.aten.permute %463, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%465 = torch.aten.linear %result0_51, %116, %115 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%466 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%467 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%468 = torch.aten.view %465, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%469 = torch.aten.permute %468, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%470 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%471 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%472 = torch.aten.view %459, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%473 = torch.aten.permute %472, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%474 = torch.aten.transpose.int %464, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
%475 = torch.aten.matmul %473, %474 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%476 = torch.aten.div.Tensor %475, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
%477 = torch.aten.add.Tensor %476, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
%values_54, %indices_55 = torch.aten.max.dim %477, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
%478 = torch.aten.sub.Tensor %477, %values_54, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
%479 = torch.aten.exp %478 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%480 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%481 = torch.aten.sum.dim_IntList %479, %480, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
%482 = torch.aten.div.Tensor %479, %481 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%483 = torch.aten.matmul %482, %469 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
%484 = torch.aten.permute %483, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%485 = torch.aten.contiguous %484, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
%486 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%487 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%488 = torch.aten.view %485, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
%489 = torch.aten.linear %488, %118, %117 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%490 = torch.aten.add.Tensor %489, %result0_51, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_56, %result1_57, %result2_58 = torch.aten.native_layer_norm %490, %203, %120, %119, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%491 = torch.aten.linear %result0_56, %122, %121 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
%492 = torch.aten.gelu %491, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
%493 = torch.aten.linear %492, %124, %123 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%494 = torch.aten.add.Tensor %493, %result0_56, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_59, %result1_60, %result2_61 = torch.aten.native_layer_norm %494, %203, %126, %125, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%495 = torch.aten.linear %result0_59, %128, %127 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%496 = torch.aten.linear %result0_59, %129, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%497 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%498 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%499 = torch.aten.view %496, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%500 = torch.aten.permute %499, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%501 = torch.aten.linear %result0_59, %131, %130 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%502 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%503 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%504 = torch.aten.view %501, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%505 = torch.aten.permute %504, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%506 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%507 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%508 = torch.aten.view %495, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%509 = torch.aten.permute %508, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%510 = torch.aten.transpose.int %500, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
%511 = torch.aten.matmul %509, %510 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%512 = torch.aten.div.Tensor %511, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
%513 = torch.aten.add.Tensor %512, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
%values_62, %indices_63 = torch.aten.max.dim %513, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
%514 = torch.aten.sub.Tensor %513, %values_62, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
%515 = torch.aten.exp %514 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%516 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%517 = torch.aten.sum.dim_IntList %515, %516, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
%518 = torch.aten.div.Tensor %515, %517 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%519 = torch.aten.matmul %518, %505 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
%520 = torch.aten.permute %519, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%521 = torch.aten.contiguous %520, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
%522 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%523 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%524 = torch.aten.view %521, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
%525 = torch.aten.linear %524, %133, %132 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%526 = torch.aten.add.Tensor %525, %result0_59, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_64, %result1_65, %result2_66 = torch.aten.native_layer_norm %526, %203, %135, %134, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%527 = torch.aten.linear %result0_64, %137, %136 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
%528 = torch.aten.gelu %527, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
%529 = torch.aten.linear %528, %139, %138 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%530 = torch.aten.add.Tensor %529, %result0_64, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_67, %result1_68, %result2_69 = torch.aten.native_layer_norm %530, %203, %141, %140, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%531 = torch.aten.linear %result0_67, %143, %142 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%532 = torch.aten.linear %result0_67, %144, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%533 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%534 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%535 = torch.aten.view %532, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%536 = torch.aten.permute %535, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%537 = torch.aten.linear %result0_67, %146, %145 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%538 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%539 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%540 = torch.aten.view %537, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%541 = torch.aten.permute %540, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%542 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%543 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%544 = torch.aten.view %531, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%545 = torch.aten.permute %544, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%546 = torch.aten.transpose.int %536, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
%547 = torch.aten.matmul %545, %546 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%548 = torch.aten.div.Tensor %547, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
%549 = torch.aten.add.Tensor %548, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
%values_70, %indices_71 = torch.aten.max.dim %549, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
%550 = torch.aten.sub.Tensor %549, %values_70, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
%551 = torch.aten.exp %550 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%552 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%553 = torch.aten.sum.dim_IntList %551, %552, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
%554 = torch.aten.div.Tensor %551, %553 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%555 = torch.aten.matmul %554, %541 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
%556 = torch.aten.permute %555, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%557 = torch.aten.contiguous %556, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
%558 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%559 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%560 = torch.aten.view %557, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
%561 = torch.aten.linear %560, %148, %147 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%562 = torch.aten.add.Tensor %561, %result0_67, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_72, %result1_73, %result2_74 = torch.aten.native_layer_norm %562, %203, %150, %149, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%563 = torch.aten.linear %result0_72, %152, %151 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
%564 = torch.aten.gelu %563, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
%565 = torch.aten.linear %564, %154, %153 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%566 = torch.aten.add.Tensor %565, %result0_72, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_75, %result1_76, %result2_77 = torch.aten.native_layer_norm %566, %203, %156, %155, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%567 = torch.aten.linear %result0_75, %158, %157 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%568 = torch.aten.linear %result0_75, %159, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%569 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%570 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%571 = torch.aten.view %568, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%572 = torch.aten.permute %571, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%573 = torch.aten.linear %result0_75, %161, %160 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%574 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%575 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%576 = torch.aten.view %573, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%577 = torch.aten.permute %576, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%578 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%579 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%580 = torch.aten.view %567, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%581 = torch.aten.permute %580, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%582 = torch.aten.transpose.int %572, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
%583 = torch.aten.matmul %581, %582 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%584 = torch.aten.div.Tensor %583, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
%585 = torch.aten.add.Tensor %584, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
%values_78, %indices_79 = torch.aten.max.dim %585, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
%586 = torch.aten.sub.Tensor %585, %values_78, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
%587 = torch.aten.exp %586 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%588 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%589 = torch.aten.sum.dim_IntList %587, %588, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
%590 = torch.aten.div.Tensor %587, %589 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%591 = torch.aten.matmul %590, %577 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
%592 = torch.aten.permute %591, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%593 = torch.aten.contiguous %592, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
%594 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%595 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%596 = torch.aten.view %593, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
%597 = torch.aten.linear %596, %163, %162 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%598 = torch.aten.add.Tensor %597, %result0_75, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_80, %result1_81, %result2_82 = torch.aten.native_layer_norm %598, %203, %165, %164, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%599 = torch.aten.linear %result0_80, %167, %166 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
%600 = torch.aten.gelu %599, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
%601 = torch.aten.linear %600, %169, %168 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%602 = torch.aten.add.Tensor %601, %result0_80, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_83, %result1_84, %result2_85 = torch.aten.native_layer_norm %602, %203, %171, %170, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%603 = torch.aten.linear %result0_83, %174, %173 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%604 = torch.aten.linear %result0_83, %175, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%605 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%606 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%607 = torch.aten.view %604, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%608 = torch.aten.permute %607, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%609 = torch.aten.linear %result0_83, %177, %176 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%610 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%611 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%612 = torch.aten.view %609, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%613 = torch.aten.permute %612, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%614 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%615 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%616 = torch.aten.view %603, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%617 = torch.aten.permute %616, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32> | |
%618 = torch.aten.transpose.int %608, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32> | |
%619 = torch.aten.matmul %617, %618 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%620 = torch.aten.div.Tensor %619, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32> | |
%621 = torch.aten.add.Tensor %620, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32> | |
%values_86, %indices_87 = torch.aten.max.dim %621, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64> | |
%622 = torch.aten.sub.Tensor %621, %values_86, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32> | |
%623 = torch.aten.exp %622 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%624 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%625 = torch.aten.sum.dim_IntList %623, %624, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32> | |
%626 = torch.aten.div.Tensor %623, %625 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32> | |
%627 = torch.aten.matmul %626, %613 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32> | |
%628 = torch.aten.permute %627, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32> | |
%629 = torch.aten.contiguous %628, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32> | |
%630 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64> | |
%631 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64> | |
%632 = torch.aten.view %629, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32> | |
%633 = torch.aten.linear %632, %178, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%634 = torch.aten.add.Tensor %633, %result0_83, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_88, %result1_89, %result2_90 = torch.aten.native_layer_norm %634, %203, %182, %183, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%635 = torch.aten.linear %result0_88, %180, %179 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32> | |
%636 = torch.aten.gelu %635, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32> | |
%637 = torch.aten.linear %636, %181, %183 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32> | |
%638 = torch.aten.add.Tensor %637, %result0_88, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%result0_91, %result1_92, %result2_93 = torch.aten.native_layer_norm %638, %203, %182, %183, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32> | |
%639 = torch.aten.slice.Tensor %result0_91, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,128,384],f32> | |
%640 = torch.aten.slice.Tensor %639, %int1, %int0, %int1, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,1,384],f32> | |
%641 = torch.aten.squeeze.dim %640, %int1 : !torch.vtensor<[1,1,384],f32>, !torch.int -> !torch.vtensor<[1,384],f32> | |
%642 = torch.aten.linear %641, %184, %183 : !torch.vtensor<[1,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,384],f32> | |
%643 = torch.aten.tanh %642 : !torch.vtensor<[1,384],f32> -> !torch.vtensor<[1,384],f32> | |
%644 = torch.aten.linear %643, %186, %185 : !torch.vtensor<[1,384],f32>, !torch.vtensor<[2,384],f32>, !torch.vtensor<[2],f32> -> !torch.vtensor<[1,2],f32> | |
return %644 : !torch.vtensor<[1,2],f32> | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment