Created
December 19, 2022 07:08
-
-
Save AmosLewis/10f5f1d025a8ee63821859f81bac6861 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| module attributes {torch.debug_module_name = "_lambda"} { | |
| func.func @forward(%arg0: tensor<1x128xi64>) -> tensor<1x2xf32> { | |
| %0 = "tosa.const"() {value = dense<[[65536, 512, 1]]> : tensor<1x3xi32>} : () -> tensor<1x3xi32> | |
| %1 = "tosa.const"() {value = dense_resource<__elided__> : tensor<2x768xf32>} : () -> tensor<2x768xf32> | |
| %2 = "tosa.const"() {value = dense_resource<__elided__> : tensor<768x768xf32>} : () -> tensor<768x768xf32> | |
| %3 = "tosa.const"() {value = dense_resource<__elided__> : tensor<768xf32>} : () -> tensor<768xf32> | |
| %4 = "tosa.const"() {value = dense_resource<__elided__> : tensor<768x3072xf32>} : () -> tensor<768x3072xf32> | |
| %5 = "tosa.const"() {value = dense_resource<__elided__> : tensor<3072xf32>} : () -> tensor<3072xf32> | |
| %6 = "tosa.const"() {value = dense_resource<__elided__> : tensor<3072x768xf32>} : () -> tensor<3072x768xf32> | |
| %7 = "tosa.const"() {value = dense<-3.40282347E+38> : tensor<f32>} : () -> tensor<f32> | |
| %8 = "tosa.const"() {value = dense_resource<__elided__> : tensor<512x768xf32>} : () -> tensor<512x768xf32> | |
| %9 = "tosa.const"() {value = dense_resource<__elided__> : tensor<128x128xsi64>} : () -> tensor<128x128xi64> | |
| %10 = "tosa.const"() {value = dense_resource<__elided__> : tensor<128100x768xf32>} : () -> tensor<128100x768xf32> | |
| %11 = "tosa.const"() {value = dense<7.680000e+02> : tensor<1xf32>} : () -> tensor<1xf32> | |
| %12 = "tosa.const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32> | |
| %13 = "tosa.const"() {value = dense<[0, 2, 1, 3]> : tensor<4xi64>} : () -> tensor<4xi64> | |
| %14 = "tosa.const"() {value = dense<[0, 2, 1]> : tensor<3xi32>} : () -> tensor<3xi32> | |
| %15 = "tosa.const"() {value = dense<13.8564062> : tensor<f32>} : () -> tensor<f32> | |
| %16 = "tosa.const"() {value = dense<0> : tensor<1x128x128x1xi32>} : () -> tensor<1x128x128x1xi32> | |
| %17 = "tosa.const"() {value = dense<"tensor<1x128x128x1xi32>} : () -> tensor<1x128x128x1xi32> | |
| %18 = "tosa.const"() {value = dense<0> : tensor<1x1x128x128xi8>} : () -> tensor<1x1x128x128xi8> | |
| %19 = "tosa.const"() {value = dense<5.000000e-01> : tensor<1x1xf32>} : () -> tensor<1x1xf32> | |
| %20 = "tosa.const"() {value = dense<7.810800e-02> : tensor<1x1xf32>} : () -> tensor<1x1xf32> | |
| %21 = "tosa.const"() {value = dense<9.720000e-04> : tensor<1x1xf32>} : () -> tensor<1x1xf32> | |
| %22 = "tosa.const"() {value = dense<2.303890e-01> : tensor<1x1xf32>} : () -> tensor<1x1xf32> | |
| %23 = "tosa.const"() {value = dense<2.783930e-01> : tensor<1x1xf32>} : () -> tensor<1x1xf32> | |
| %24 = "tosa.const"() {value = dense<0.707106769> : tensor<1x1xf32>} : () -> tensor<1x1xf32> | |
| %25 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x1xf32>} : () -> tensor<1x1xf32> | |
| %26 = "tosa.const"() {value = dense<5.000000e-01> : tensor<1x1x1xf32>} : () -> tensor<1x1x1xf32> | |
| %27 = "tosa.const"() {value = dense<7.810800e-02> : tensor<1x1x1xf32>} : () -> tensor<1x1x1xf32> | |
| %28 = "tosa.const"() {value = dense<9.720000e-04> : tensor<1x1x1xf32>} : () -> tensor<1x1x1xf32> | |
| %29 = "tosa.const"() {value = dense<2.303890e-01> : tensor<1x1x1xf32>} : () -> tensor<1x1x1xf32> | |
| %30 = "tosa.const"() {value = dense<2.783930e-01> : tensor<1x1x1xf32>} : () -> tensor<1x1x1xf32> | |
| %31 = "tosa.const"() {value = dense<0.707106769> : tensor<1x1x1xf32>} : () -> tensor<1x1x1xf32> | |
| %32 = "tosa.const"() {value = dense<1.000000e+00> : tensor<1x1x1xf32>} : () -> tensor<1x1x1xf32> | |
| %33 = "tosa.const"() {value = dense<0.000000e+00> : tensor<f32>} : () -> tensor<f32> | |
| %34 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x1x1xf32>} : () -> tensor<1x1x1xf32> | |
| %35 = "tosa.const"() {value = dense<256> : tensor<1x1x1x1xi32>} : () -> tensor<1x1x1x1xi32> | |
| %36 = "tosa.const"() {value = dense<1.000000e+00> : tensor<1x1xf32>} : () -> tensor<1x1xf32> | |
| %37 = "tosa.const"() {value = dense<1.000000e-07> : tensor<1x1xf32>} : () -> tensor<1x1xf32> | |
| %38 = "tosa.const"() {value = dense<1.000000e-07> : tensor<1x1x1xf32>} : () -> tensor<1x1x1xf32> | |
| %39 = "tosa.const"() {value = dense<1.000000e+00> : tensor<1x1x1x128xf32>} : () -> tensor<1x1x1x128xf32> | |
| %40 = "tosa.const"() {value = dense<1.000000e+00> : tensor<1x1x128x1xf32>} : () -> tensor<1x1x128x1xf32> | |
| %41 = "tosa.reshape"(%10) {new_shape = [1, 128100, 768]} : (tensor<128100x768xf32>) -> tensor<1x128100x768xf32> | |
| %42 = "tosa.cast"(%arg0) : (tensor<1x128xi64>) -> tensor<1x128xi32> | |
| %43 = "tosa.gather"(%41, %42) : (tensor<1x128100x768xf32>, tensor<1x128xi32>) -> tensor<1x128x768xf32> | |
| %44 = "tosa.reciprocal"(%11) : (tensor<1xf32>) -> tensor<1xf32> | |
| %45 = "tosa.reduce_sum"(%43) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %46 = "tosa.reshape"(%44) {new_shape = [1, 1, 1]} : (tensor<1xf32>) -> tensor<1x1x1xf32> | |
| %47 = "tosa.mul"(%45, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %48 = "tosa.sub"(%43, %47) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %49 = "tosa.mul"(%48, %48) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %50 = "tosa.reduce_sum"(%49) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %51 = "tosa.mul"(%50, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %52 = "tosa.reshape"(%3) {new_shape = [1, 1, 768]} : (tensor<768xf32>) -> tensor<1x1x768xf32> | |
| %53 = "tosa.add"(%51, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %54 = "tosa.rsqrt"(%53) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %55 = "tosa.mul"(%48, %54) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %56 = "tosa.mul"(%55, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %57 = "tosa.add"(%56, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %58 = "tosa.mul"(%39, %40) {shift = 0 : i32} : (tensor<1x1x1x128xf32>, tensor<1x1x128x1xf32>) -> tensor<1x1x128x128xf32> | |
| %59 = "tosa.cast"(%58) : (tensor<1x1x128x128xf32>) -> tensor<1x1x128x128xi8> | |
| %60 = "tosa.reduce_sum"(%8) {axis = 1 : i64} : (tensor<512x768xf32>) -> tensor<512x1xf32> | |
| %61 = "tosa.reshape"(%44) {new_shape = [1, 1]} : (tensor<1xf32>) -> tensor<1x1xf32> | |
| %62 = "tosa.mul"(%60, %61) {shift = 0 : i32} : (tensor<512x1xf32>, tensor<1x1xf32>) -> tensor<512x1xf32> | |
| %63 = "tosa.sub"(%8, %62) : (tensor<512x768xf32>, tensor<512x1xf32>) -> tensor<512x768xf32> | |
| %64 = "tosa.mul"(%63, %63) {shift = 0 : i32} : (tensor<512x768xf32>, tensor<512x768xf32>) -> tensor<512x768xf32> | |
| %65 = "tosa.reduce_sum"(%64) {axis = 1 : i64} : (tensor<512x768xf32>) -> tensor<512x1xf32> | |
| %66 = "tosa.mul"(%65, %61) {shift = 0 : i32} : (tensor<512x1xf32>, tensor<1x1xf32>) -> tensor<512x1xf32> | |
| %67 = "tosa.reshape"(%3) {new_shape = [1, 768]} : (tensor<768xf32>) -> tensor<1x768xf32> | |
| %68 = "tosa.add"(%66, %37) : (tensor<512x1xf32>, tensor<1x1xf32>) -> tensor<512x1xf32> | |
| %69 = "tosa.rsqrt"(%68) : (tensor<512x1xf32>) -> tensor<512x1xf32> | |
| %70 = "tosa.mul"(%63, %69) {shift = 0 : i32} : (tensor<512x768xf32>, tensor<512x1xf32>) -> tensor<512x768xf32> | |
| %71 = "tosa.mul"(%70, %67) {shift = 0 : i32} : (tensor<512x768xf32>, tensor<1x768xf32>) -> tensor<512x768xf32> | |
| %72 = "tosa.add"(%71, %67) : (tensor<512x768xf32>, tensor<1x768xf32>) -> tensor<512x768xf32> | |
| %73 = "tosa.transpose"(%2, %12) : (tensor<768x768xf32>, tensor<2xi32>) -> tensor<768x768xf32> | |
| %74 = "tosa.reshape"(%73) {new_shape = [1, 768, 768]} : (tensor<768x768xf32>) -> tensor<1x768x768xf32> | |
| %75 = "tosa.matmul"(%57, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %76 = "tosa.reshape"(%75) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %77 = "tosa.add"(%67, %76) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %78 = "tosa.reshape"(%77) {new_shape = [1, 128, 12, -1]} : (tensor<128x768xf32>) -> tensor<1x128x12x64xf32> | |
| %79 = "tosa.transpose"(%78, %13) : (tensor<1x128x12x64xf32>, tensor<4xi64>) -> tensor<1x12x128x64xf32> | |
| %80 = "tosa.reshape"(%79) {new_shape = [-1, 128, 64]} : (tensor<1x12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %81 = "tosa.transpose"(%80, %14) : (tensor<12x128x64xf32>, tensor<3xi32>) -> tensor<12x64x128xf32> | |
| %82 = "tosa.matmul"(%80, %81) : (tensor<12x128x64xf32>, tensor<12x64x128xf32>) -> tensor<12x128x128xf32> | |
| %83 = "tosa.reciprocal"(%15) : (tensor<f32>) -> tensor<f32> | |
| %84 = "tosa.reshape"(%83) {new_shape = [1, 1, 1]} : (tensor<f32>) -> tensor<1x1x1xf32> | |
| %85 = "tosa.mul"(%82, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %86 = "tosa.reshape"(%9) {new_shape = [1, 1, 128, 128]} : (tensor<128x128xi64>) -> tensor<1x1x128x128xi64> | |
| %87 = "tosa.reshape"(%72) {new_shape = [1, 512, 768]} : (tensor<512x768xf32>) -> tensor<1x512x768xf32> | |
| %88 = "tosa.matmul"(%87, %74) : (tensor<1x512x768xf32>, tensor<1x768x768xf32>) -> tensor<1x512x768xf32> | |
| %89 = "tosa.reshape"(%88) {new_shape = [512, 768]} : (tensor<1x512x768xf32>) -> tensor<512x768xf32> | |
| %90 = "tosa.add"(%67, %89) : (tensor<1x768xf32>, tensor<512x768xf32>) -> tensor<512x768xf32> | |
| %91 = "tosa.reshape"(%90) {new_shape = [1, 512, 12, -1]} : (tensor<512x768xf32>) -> tensor<1x512x12x64xf32> | |
| %92 = "tosa.transpose"(%91, %13) : (tensor<1x512x12x64xf32>, tensor<4xi64>) -> tensor<1x12x512x64xf32> | |
| %93 = "tosa.reshape"(%92) {new_shape = [12, 512, 64]} : (tensor<1x12x512x64xf32>) -> tensor<12x512x64xf32> | |
| %94 = "tosa.transpose"(%93, %14) : (tensor<12x512x64xf32>, tensor<3xi32>) -> tensor<12x64x512xf32> | |
| %95 = "tosa.matmul"(%80, %94) : (tensor<12x128x64xf32>, tensor<12x64x512xf32>) -> tensor<12x128x512xf32> | |
| %96 = "tosa.cast"(%86) : (tensor<1x1x128x128xi64>) -> tensor<1x1x128x128xi32> | |
| %97 = "tosa.add"(%96, %35) : (tensor<1x1x128x128xi32>, tensor<1x1x1x1xi32>) -> tensor<1x1x128x128xi32> | |
| %98 = "tosa.cast"(%97) : (tensor<1x1x128x128xi32>) -> tensor<1x1x128x128xi64> | |
| %99 = "tosa.clamp"(%98) {max_fp = 5.110000e+02 : f32, max_int = 511 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x1x128x128xi64>) -> tensor<1x1x128x128xi64> | |
| %100 = "tosa.reshape"(%99) {new_shape = [1, 128, 128]} : (tensor<1x1x128x128xi64>) -> tensor<1x128x128xi64> | |
| %101 = "tosa.cast"(%100) : (tensor<1x128x128xi64>) -> tensor<1x128x128xi32> | |
| %102 = "tosa.reshape"(%101) {new_shape = [1, 128, 128, 1]} : (tensor<1x128x128xi32>) -> tensor<1x128x128x1xi32> | |
| %103 = "tosa.concat"(%16, %17, %102) {axis = 3 : i64} : (tensor<1x128x128x1xi32>, tensor<1x128x128x1xi32>, tensor<1x128x128x1xi32>) -> tensor<1x128x128x3xi32> | |
| %104 = "tosa.reshape"(%95) {new_shape = [1, 786432, 1]} : (tensor<12x128x512xf32>) -> tensor<1x786432x1xf32> | |
| %105 = "tosa.reshape"(%103) {new_shape = [16384, 3]} : (tensor<1x128x128x3xi32>) -> tensor<16384x3xi32> | |
| %106 = "tosa.mul"(%105, %0) {shift = 0 : i32} : (tensor<16384x3xi32>, tensor<1x3xi32>) -> tensor<16384x3xi32> | |
| %107 = "tosa.reduce_sum"(%106) {axis = 1 : i64} : (tensor<16384x3xi32>) -> tensor<16384x1xi32> | |
| %108 = "tosa.reshape"(%107) {new_shape = [1, 16384]} : (tensor<16384x1xi32>) -> tensor<1x16384xi32> | |
| %109 = "tosa.gather"(%104, %108) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %110 = "tosa.reshape"(%109) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %111 = "tosa.mul"(%110, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %112 = "tosa.add"(%111, %34) : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %113 = "tosa.negate"(%86) : (tensor<1x1x128x128xi64>) -> tensor<1x1x128x128xi64> | |
| %114 = "tosa.cast"(%113) : (tensor<1x1x128x128xi64>) -> tensor<1x1x128x128xi32> | |
| %115 = "tosa.add"(%114, %35) : (tensor<1x1x128x128xi32>, tensor<1x1x1x1xi32>) -> tensor<1x1x128x128xi32> | |
| %116 = "tosa.cast"(%115) : (tensor<1x1x128x128xi32>) -> tensor<1x1x128x128xi64> | |
| %117 = "tosa.clamp"(%116) {max_fp = 5.110000e+02 : f32, max_int = 511 : i64, min_fp = 0.000000e+00 : f32, min_int = 0 : i64} : (tensor<1x1x128x128xi64>) -> tensor<1x1x128x128xi64> | |
| %118 = "tosa.reshape"(%117) {new_shape = [1, 128, 128]} : (tensor<1x1x128x128xi64>) -> tensor<1x128x128xi64> | |
| %119 = "tosa.cast"(%118) : (tensor<1x128x128xi64>) -> tensor<1x128x128xi32> | |
| %120 = "tosa.reshape"(%119) {new_shape = [1, 128, 128, 1]} : (tensor<1x128x128xi32>) -> tensor<1x128x128x1xi32> | |
| %121 = "tosa.concat"(%16, %17, %120) {axis = 3 : i64} : (tensor<1x128x128x1xi32>, tensor<1x128x128x1xi32>, tensor<1x128x128x1xi32>) -> tensor<1x128x128x3xi32> | |
| %122 = "tosa.reshape"(%121) {new_shape = [16384, 3]} : (tensor<1x128x128x3xi32>) -> tensor<16384x3xi32> | |
| %123 = "tosa.mul"(%122, %0) {shift = 0 : i32} : (tensor<16384x3xi32>, tensor<1x3xi32>) -> tensor<16384x3xi32> | |
| %124 = "tosa.reduce_sum"(%123) {axis = 1 : i64} : (tensor<16384x3xi32>) -> tensor<16384x1xi32> | |
| %125 = "tosa.reshape"(%124) {new_shape = [1, 16384]} : (tensor<16384x1xi32>) -> tensor<1x16384xi32> | |
| %126 = "tosa.gather"(%104, %125) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %127 = "tosa.reshape"(%126) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %128 = "tosa.transpose"(%127, %14) : (tensor<12x128x128xf32>, tensor<3xi32>) -> tensor<12x128x128xf32> | |
| %129 = "tosa.mul"(%128, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %130 = "tosa.add"(%112, %129) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %131 = "tosa.add"(%85, %130) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %132 = "tosa.reshape"(%131) {new_shape = [-1, 12, 128, 128]} : (tensor<12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %133 = "tosa.equal"(%59, %18) : (tensor<1x1x128x128xi8>, tensor<1x1x128x128xi8>) -> tensor<1x1x128x128xi1> | |
| %134 = "tosa.logical_not"(%133) : (tensor<1x1x128x128xi1>) -> tensor<1x1x128x128xi1> | |
| %135 = "tosa.bitwise_not"(%134) : (tensor<1x1x128x128xi1>) -> tensor<1x1x128x128xi1> | |
| %136 = "tosa.select"(%135, %7, %132) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %137 = "tosa.reduce_max"(%136) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %138 = "tosa.sub"(%136, %137) : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %139 = "tosa.exp"(%138) : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %140 = "tosa.reduce_sum"(%139) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %141 = "tosa.reciprocal"(%140) : (tensor<1x12x128x1xf32>) -> tensor<1x12x128x1xf32> | |
| %142 = "tosa.mul"(%139, %141) {shift = 0 : i32} : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %143 = "tosa.select"(%135, %33, %142) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %144 = "tosa.reshape"(%143) {new_shape = [-1, 128, 128]} : (tensor<1x12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %145 = "tosa.matmul"(%144, %80) : (tensor<12x128x128xf32>, tensor<12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %146 = "tosa.reshape"(%145) {new_shape = [-1, 12, 128, 64]} : (tensor<12x128x64xf32>) -> tensor<1x12x128x64xf32> | |
| %147 = "tosa.transpose"(%146, %13) : (tensor<1x12x128x64xf32>, tensor<4xi64>) -> tensor<1x128x12x64xf32> | |
| %148 = "tosa.reshape"(%147) {new_shape = [1, 128, 768]} : (tensor<1x128x12x64xf32>) -> tensor<1x128x768xf32> | |
| %149 = "tosa.matmul"(%148, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %150 = "tosa.reshape"(%149) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %151 = "tosa.add"(%67, %150) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %152 = "tosa.reshape"(%151) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %153 = "tosa.add"(%152, %57) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %154 = "tosa.reduce_sum"(%153) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %155 = "tosa.mul"(%154, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %156 = "tosa.sub"(%153, %155) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %157 = "tosa.mul"(%156, %156) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %158 = "tosa.reduce_sum"(%157) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %159 = "tosa.mul"(%158, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %160 = "tosa.add"(%159, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %161 = "tosa.rsqrt"(%160) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %162 = "tosa.mul"(%156, %161) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %163 = "tosa.mul"(%162, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %164 = "tosa.add"(%163, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %165 = "tosa.transpose"(%6, %12) : (tensor<3072x768xf32>, tensor<2xi32>) -> tensor<768x3072xf32> | |
| %166 = "tosa.reshape"(%165) {new_shape = [1, 768, 3072]} : (tensor<768x3072xf32>) -> tensor<1x768x3072xf32> | |
| %167 = "tosa.matmul"(%164, %166) : (tensor<1x128x768xf32>, tensor<1x768x3072xf32>) -> tensor<1x128x3072xf32> | |
| %168 = "tosa.reshape"(%167) {new_shape = [128, 3072]} : (tensor<1x128x3072xf32>) -> tensor<128x3072xf32> | |
| %169 = "tosa.reshape"(%5) {new_shape = [1, 3072]} : (tensor<3072xf32>) -> tensor<1x3072xf32> | |
| %170 = "tosa.add"(%169, %168) : (tensor<1x3072xf32>, tensor<128x3072xf32>) -> tensor<128x3072xf32> | |
| %171 = "tosa.reshape"(%170) {new_shape = [1, 128, 3072]} : (tensor<128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %172 = "tosa.sub"(%171, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %173 = "tosa.mul"(%172, %31) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %174 = "tosa.abs"(%173) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %175 = "tosa.mul"(%174, %30) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %176 = "tosa.add"(%175, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %177 = "tosa.mul"(%174, %174) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %178 = "tosa.mul"(%177, %29) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %179 = "tosa.add"(%176, %178) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %180 = "tosa.mul"(%177, %174) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %181 = "tosa.mul"(%180, %28) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %182 = "tosa.add"(%179, %181) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %183 = "tosa.mul"(%180, %174) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %184 = "tosa.mul"(%183, %27) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %185 = "tosa.add"(%182, %184) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %186 = "tosa.reciprocal"(%185) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %187 = "tosa.mul"(%186, %186) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %188 = "tosa.mul"(%187, %187) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %189 = "tosa.sub"(%32, %188) : (tensor<1x1x1xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %190 = "tosa.greater_equal"(%173, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xi1> | |
| %191 = "tosa.negate"(%189) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %192 = "tosa.select"(%190, %189, %191) : (tensor<1x128x3072xi1>, tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %193 = "tosa.add"(%192, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %194 = "tosa.mul"(%193, %26) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %195 = "tosa.mul"(%171, %194) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %196 = "tosa.transpose"(%4, %12) : (tensor<768x3072xf32>, tensor<2xi32>) -> tensor<3072x768xf32> | |
| %197 = "tosa.reshape"(%196) {new_shape = [1, 3072, 768]} : (tensor<3072x768xf32>) -> tensor<1x3072x768xf32> | |
| %198 = "tosa.matmul"(%195, %197) : (tensor<1x128x3072xf32>, tensor<1x3072x768xf32>) -> tensor<1x128x768xf32> | |
| %199 = "tosa.reshape"(%198) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %200 = "tosa.add"(%67, %199) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %201 = "tosa.reshape"(%200) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %202 = "tosa.add"(%201, %164) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %203 = "tosa.reduce_sum"(%202) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %204 = "tosa.mul"(%203, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %205 = "tosa.sub"(%202, %204) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %206 = "tosa.mul"(%205, %205) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %207 = "tosa.reduce_sum"(%206) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %208 = "tosa.mul"(%207, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %209 = "tosa.add"(%208, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %210 = "tosa.rsqrt"(%209) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %211 = "tosa.mul"(%205, %210) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %212 = "tosa.mul"(%211, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %213 = "tosa.add"(%212, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %214 = "tosa.matmul"(%213, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %215 = "tosa.reshape"(%214) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %216 = "tosa.add"(%67, %215) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %217 = "tosa.reshape"(%216) {new_shape = [1, 128, 12, -1]} : (tensor<128x768xf32>) -> tensor<1x128x12x64xf32> | |
| %218 = "tosa.transpose"(%217, %13) : (tensor<1x128x12x64xf32>, tensor<4xi64>) -> tensor<1x12x128x64xf32> | |
| %219 = "tosa.reshape"(%218) {new_shape = [-1, 128, 64]} : (tensor<1x12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %220 = "tosa.transpose"(%219, %14) : (tensor<12x128x64xf32>, tensor<3xi32>) -> tensor<12x64x128xf32> | |
| %221 = "tosa.matmul"(%219, %220) : (tensor<12x128x64xf32>, tensor<12x64x128xf32>) -> tensor<12x128x128xf32> | |
| %222 = "tosa.mul"(%221, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %223 = "tosa.matmul"(%219, %94) : (tensor<12x128x64xf32>, tensor<12x64x512xf32>) -> tensor<12x128x512xf32> | |
| %224 = "tosa.reshape"(%223) {new_shape = [1, 786432, 1]} : (tensor<12x128x512xf32>) -> tensor<1x786432x1xf32> | |
| %225 = "tosa.gather"(%224, %108) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %226 = "tosa.reshape"(%225) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %227 = "tosa.mul"(%226, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %228 = "tosa.add"(%227, %34) : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %229 = "tosa.gather"(%224, %125) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %230 = "tosa.reshape"(%229) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %231 = "tosa.transpose"(%230, %14) : (tensor<12x128x128xf32>, tensor<3xi32>) -> tensor<12x128x128xf32> | |
| %232 = "tosa.mul"(%231, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %233 = "tosa.add"(%228, %232) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %234 = "tosa.add"(%222, %233) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %235 = "tosa.reshape"(%234) {new_shape = [-1, 12, 128, 128]} : (tensor<12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %236 = "tosa.select"(%135, %7, %235) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %237 = "tosa.reduce_max"(%236) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %238 = "tosa.sub"(%236, %237) : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %239 = "tosa.exp"(%238) : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %240 = "tosa.reduce_sum"(%239) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %241 = "tosa.reciprocal"(%240) : (tensor<1x12x128x1xf32>) -> tensor<1x12x128x1xf32> | |
| %242 = "tosa.mul"(%239, %241) {shift = 0 : i32} : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %243 = "tosa.select"(%135, %33, %242) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %244 = "tosa.reshape"(%243) {new_shape = [-1, 128, 128]} : (tensor<1x12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %245 = "tosa.matmul"(%244, %219) : (tensor<12x128x128xf32>, tensor<12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %246 = "tosa.reshape"(%245) {new_shape = [-1, 12, 128, 64]} : (tensor<12x128x64xf32>) -> tensor<1x12x128x64xf32> | |
| %247 = "tosa.transpose"(%246, %13) : (tensor<1x12x128x64xf32>, tensor<4xi64>) -> tensor<1x128x12x64xf32> | |
| %248 = "tosa.reshape"(%247) {new_shape = [1, 128, 768]} : (tensor<1x128x12x64xf32>) -> tensor<1x128x768xf32> | |
| %249 = "tosa.matmul"(%248, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %250 = "tosa.reshape"(%249) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %251 = "tosa.add"(%67, %250) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %252 = "tosa.reshape"(%251) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %253 = "tosa.add"(%252, %213) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %254 = "tosa.reduce_sum"(%253) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %255 = "tosa.mul"(%254, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %256 = "tosa.sub"(%253, %255) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %257 = "tosa.mul"(%256, %256) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %258 = "tosa.reduce_sum"(%257) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %259 = "tosa.mul"(%258, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %260 = "tosa.add"(%259, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %261 = "tosa.rsqrt"(%260) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %262 = "tosa.mul"(%256, %261) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %263 = "tosa.mul"(%262, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %264 = "tosa.add"(%263, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %265 = "tosa.matmul"(%264, %166) : (tensor<1x128x768xf32>, tensor<1x768x3072xf32>) -> tensor<1x128x3072xf32> | |
| %266 = "tosa.reshape"(%265) {new_shape = [128, 3072]} : (tensor<1x128x3072xf32>) -> tensor<128x3072xf32> | |
| %267 = "tosa.add"(%169, %266) : (tensor<1x3072xf32>, tensor<128x3072xf32>) -> tensor<128x3072xf32> | |
| %268 = "tosa.reshape"(%267) {new_shape = [1, 128, 3072]} : (tensor<128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %269 = "tosa.sub"(%268, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %270 = "tosa.mul"(%269, %31) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %271 = "tosa.abs"(%270) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %272 = "tosa.mul"(%271, %30) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %273 = "tosa.add"(%272, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %274 = "tosa.mul"(%271, %271) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %275 = "tosa.mul"(%274, %29) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %276 = "tosa.add"(%273, %275) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %277 = "tosa.mul"(%274, %271) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %278 = "tosa.mul"(%277, %28) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %279 = "tosa.add"(%276, %278) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %280 = "tosa.mul"(%277, %271) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %281 = "tosa.mul"(%280, %27) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %282 = "tosa.add"(%279, %281) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %283 = "tosa.reciprocal"(%282) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %284 = "tosa.mul"(%283, %283) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %285 = "tosa.mul"(%284, %284) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %286 = "tosa.sub"(%32, %285) : (tensor<1x1x1xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %287 = "tosa.greater_equal"(%270, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xi1> | |
| %288 = "tosa.negate"(%286) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %289 = "tosa.select"(%287, %286, %288) : (tensor<1x128x3072xi1>, tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %290 = "tosa.add"(%289, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %291 = "tosa.mul"(%290, %26) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %292 = "tosa.mul"(%268, %291) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %293 = "tosa.matmul"(%292, %197) : (tensor<1x128x3072xf32>, tensor<1x3072x768xf32>) -> tensor<1x128x768xf32> | |
| %294 = "tosa.reshape"(%293) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %295 = "tosa.add"(%67, %294) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %296 = "tosa.reshape"(%295) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %297 = "tosa.add"(%296, %264) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %298 = "tosa.reduce_sum"(%297) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %299 = "tosa.mul"(%298, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %300 = "tosa.sub"(%297, %299) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %301 = "tosa.mul"(%300, %300) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %302 = "tosa.reduce_sum"(%301) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %303 = "tosa.mul"(%302, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %304 = "tosa.add"(%303, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %305 = "tosa.rsqrt"(%304) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %306 = "tosa.mul"(%300, %305) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %307 = "tosa.mul"(%306, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %308 = "tosa.add"(%307, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %309 = "tosa.matmul"(%308, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %310 = "tosa.reshape"(%309) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %311 = "tosa.add"(%67, %310) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %312 = "tosa.reshape"(%311) {new_shape = [1, 128, 12, -1]} : (tensor<128x768xf32>) -> tensor<1x128x12x64xf32> | |
| %313 = "tosa.transpose"(%312, %13) : (tensor<1x128x12x64xf32>, tensor<4xi64>) -> tensor<1x12x128x64xf32> | |
| %314 = "tosa.reshape"(%313) {new_shape = [-1, 128, 64]} : (tensor<1x12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %315 = "tosa.transpose"(%314, %14) : (tensor<12x128x64xf32>, tensor<3xi32>) -> tensor<12x64x128xf32> | |
| %316 = "tosa.matmul"(%314, %315) : (tensor<12x128x64xf32>, tensor<12x64x128xf32>) -> tensor<12x128x128xf32> | |
| %317 = "tosa.mul"(%316, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %318 = "tosa.matmul"(%314, %94) : (tensor<12x128x64xf32>, tensor<12x64x512xf32>) -> tensor<12x128x512xf32> | |
| %319 = "tosa.reshape"(%318) {new_shape = [1, 786432, 1]} : (tensor<12x128x512xf32>) -> tensor<1x786432x1xf32> | |
| %320 = "tosa.gather"(%319, %108) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %321 = "tosa.reshape"(%320) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %322 = "tosa.mul"(%321, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %323 = "tosa.add"(%322, %34) : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %324 = "tosa.gather"(%319, %125) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %325 = "tosa.reshape"(%324) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %326 = "tosa.transpose"(%325, %14) : (tensor<12x128x128xf32>, tensor<3xi32>) -> tensor<12x128x128xf32> | |
| %327 = "tosa.mul"(%326, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %328 = "tosa.add"(%323, %327) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %329 = "tosa.add"(%317, %328) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %330 = "tosa.reshape"(%329) {new_shape = [-1, 12, 128, 128]} : (tensor<12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %331 = "tosa.select"(%135, %7, %330) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %332 = "tosa.reduce_max"(%331) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %333 = "tosa.sub"(%331, %332) : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %334 = "tosa.exp"(%333) : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %335 = "tosa.reduce_sum"(%334) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %336 = "tosa.reciprocal"(%335) : (tensor<1x12x128x1xf32>) -> tensor<1x12x128x1xf32> | |
| %337 = "tosa.mul"(%334, %336) {shift = 0 : i32} : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %338 = "tosa.select"(%135, %33, %337) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %339 = "tosa.reshape"(%338) {new_shape = [-1, 128, 128]} : (tensor<1x12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %340 = "tosa.matmul"(%339, %314) : (tensor<12x128x128xf32>, tensor<12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %341 = "tosa.reshape"(%340) {new_shape = [-1, 12, 128, 64]} : (tensor<12x128x64xf32>) -> tensor<1x12x128x64xf32> | |
| %342 = "tosa.transpose"(%341, %13) : (tensor<1x12x128x64xf32>, tensor<4xi64>) -> tensor<1x128x12x64xf32> | |
| %343 = "tosa.reshape"(%342) {new_shape = [1, 128, 768]} : (tensor<1x128x12x64xf32>) -> tensor<1x128x768xf32> | |
| %344 = "tosa.matmul"(%343, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %345 = "tosa.reshape"(%344) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %346 = "tosa.add"(%67, %345) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %347 = "tosa.reshape"(%346) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %348 = "tosa.add"(%347, %308) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %349 = "tosa.reduce_sum"(%348) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %350 = "tosa.mul"(%349, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %351 = "tosa.sub"(%348, %350) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %352 = "tosa.mul"(%351, %351) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %353 = "tosa.reduce_sum"(%352) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %354 = "tosa.mul"(%353, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %355 = "tosa.add"(%354, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %356 = "tosa.rsqrt"(%355) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %357 = "tosa.mul"(%351, %356) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %358 = "tosa.mul"(%357, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %359 = "tosa.add"(%358, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %360 = "tosa.matmul"(%359, %166) : (tensor<1x128x768xf32>, tensor<1x768x3072xf32>) -> tensor<1x128x3072xf32> | |
| %361 = "tosa.reshape"(%360) {new_shape = [128, 3072]} : (tensor<1x128x3072xf32>) -> tensor<128x3072xf32> | |
| %362 = "tosa.add"(%169, %361) : (tensor<1x3072xf32>, tensor<128x3072xf32>) -> tensor<128x3072xf32> | |
| %363 = "tosa.reshape"(%362) {new_shape = [1, 128, 3072]} : (tensor<128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %364 = "tosa.sub"(%363, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %365 = "tosa.mul"(%364, %31) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %366 = "tosa.abs"(%365) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %367 = "tosa.mul"(%366, %30) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %368 = "tosa.add"(%367, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %369 = "tosa.mul"(%366, %366) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %370 = "tosa.mul"(%369, %29) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %371 = "tosa.add"(%368, %370) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %372 = "tosa.mul"(%369, %366) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %373 = "tosa.mul"(%372, %28) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %374 = "tosa.add"(%371, %373) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %375 = "tosa.mul"(%372, %366) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %376 = "tosa.mul"(%375, %27) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %377 = "tosa.add"(%374, %376) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %378 = "tosa.reciprocal"(%377) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %379 = "tosa.mul"(%378, %378) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %380 = "tosa.mul"(%379, %379) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %381 = "tosa.sub"(%32, %380) : (tensor<1x1x1xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %382 = "tosa.greater_equal"(%365, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xi1> | |
| %383 = "tosa.negate"(%381) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %384 = "tosa.select"(%382, %381, %383) : (tensor<1x128x3072xi1>, tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %385 = "tosa.add"(%384, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %386 = "tosa.mul"(%385, %26) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %387 = "tosa.mul"(%363, %386) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %388 = "tosa.matmul"(%387, %197) : (tensor<1x128x3072xf32>, tensor<1x3072x768xf32>) -> tensor<1x128x768xf32> | |
| %389 = "tosa.reshape"(%388) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %390 = "tosa.add"(%67, %389) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %391 = "tosa.reshape"(%390) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %392 = "tosa.add"(%391, %359) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %393 = "tosa.reduce_sum"(%392) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %394 = "tosa.mul"(%393, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %395 = "tosa.sub"(%392, %394) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %396 = "tosa.mul"(%395, %395) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %397 = "tosa.reduce_sum"(%396) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %398 = "tosa.mul"(%397, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %399 = "tosa.add"(%398, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %400 = "tosa.rsqrt"(%399) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %401 = "tosa.mul"(%395, %400) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %402 = "tosa.mul"(%401, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %403 = "tosa.add"(%402, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %404 = "tosa.matmul"(%403, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %405 = "tosa.reshape"(%404) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %406 = "tosa.add"(%67, %405) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %407 = "tosa.reshape"(%406) {new_shape = [1, 128, 12, -1]} : (tensor<128x768xf32>) -> tensor<1x128x12x64xf32> | |
| %408 = "tosa.transpose"(%407, %13) : (tensor<1x128x12x64xf32>, tensor<4xi64>) -> tensor<1x12x128x64xf32> | |
| %409 = "tosa.reshape"(%408) {new_shape = [-1, 128, 64]} : (tensor<1x12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %410 = "tosa.transpose"(%409, %14) : (tensor<12x128x64xf32>, tensor<3xi32>) -> tensor<12x64x128xf32> | |
| %411 = "tosa.matmul"(%409, %410) : (tensor<12x128x64xf32>, tensor<12x64x128xf32>) -> tensor<12x128x128xf32> | |
| %412 = "tosa.mul"(%411, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %413 = "tosa.matmul"(%409, %94) : (tensor<12x128x64xf32>, tensor<12x64x512xf32>) -> tensor<12x128x512xf32> | |
| %414 = "tosa.reshape"(%413) {new_shape = [1, 786432, 1]} : (tensor<12x128x512xf32>) -> tensor<1x786432x1xf32> | |
| %415 = "tosa.gather"(%414, %108) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %416 = "tosa.reshape"(%415) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %417 = "tosa.mul"(%416, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %418 = "tosa.add"(%417, %34) : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %419 = "tosa.gather"(%414, %125) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %420 = "tosa.reshape"(%419) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %421 = "tosa.transpose"(%420, %14) : (tensor<12x128x128xf32>, tensor<3xi32>) -> tensor<12x128x128xf32> | |
| %422 = "tosa.mul"(%421, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %423 = "tosa.add"(%418, %422) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %424 = "tosa.add"(%412, %423) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %425 = "tosa.reshape"(%424) {new_shape = [-1, 12, 128, 128]} : (tensor<12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %426 = "tosa.select"(%135, %7, %425) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %427 = "tosa.reduce_max"(%426) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %428 = "tosa.sub"(%426, %427) : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %429 = "tosa.exp"(%428) : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %430 = "tosa.reduce_sum"(%429) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %431 = "tosa.reciprocal"(%430) : (tensor<1x12x128x1xf32>) -> tensor<1x12x128x1xf32> | |
| %432 = "tosa.mul"(%429, %431) {shift = 0 : i32} : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %433 = "tosa.select"(%135, %33, %432) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %434 = "tosa.reshape"(%433) {new_shape = [-1, 128, 128]} : (tensor<1x12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %435 = "tosa.matmul"(%434, %409) : (tensor<12x128x128xf32>, tensor<12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %436 = "tosa.reshape"(%435) {new_shape = [-1, 12, 128, 64]} : (tensor<12x128x64xf32>) -> tensor<1x12x128x64xf32> | |
| %437 = "tosa.transpose"(%436, %13) : (tensor<1x12x128x64xf32>, tensor<4xi64>) -> tensor<1x128x12x64xf32> | |
| %438 = "tosa.reshape"(%437) {new_shape = [1, 128, 768]} : (tensor<1x128x12x64xf32>) -> tensor<1x128x768xf32> | |
| %439 = "tosa.matmul"(%438, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %440 = "tosa.reshape"(%439) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %441 = "tosa.add"(%67, %440) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %442 = "tosa.reshape"(%441) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %443 = "tosa.add"(%442, %403) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %444 = "tosa.reduce_sum"(%443) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %445 = "tosa.mul"(%444, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %446 = "tosa.sub"(%443, %445) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %447 = "tosa.mul"(%446, %446) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %448 = "tosa.reduce_sum"(%447) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %449 = "tosa.mul"(%448, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %450 = "tosa.add"(%449, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %451 = "tosa.rsqrt"(%450) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %452 = "tosa.mul"(%446, %451) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %453 = "tosa.mul"(%452, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %454 = "tosa.add"(%453, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %455 = "tosa.matmul"(%454, %166) : (tensor<1x128x768xf32>, tensor<1x768x3072xf32>) -> tensor<1x128x3072xf32> | |
| %456 = "tosa.reshape"(%455) {new_shape = [128, 3072]} : (tensor<1x128x3072xf32>) -> tensor<128x3072xf32> | |
| %457 = "tosa.add"(%169, %456) : (tensor<1x3072xf32>, tensor<128x3072xf32>) -> tensor<128x3072xf32> | |
| %458 = "tosa.reshape"(%457) {new_shape = [1, 128, 3072]} : (tensor<128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %459 = "tosa.sub"(%458, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %460 = "tosa.mul"(%459, %31) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %461 = "tosa.abs"(%460) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %462 = "tosa.mul"(%461, %30) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %463 = "tosa.add"(%462, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %464 = "tosa.mul"(%461, %461) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %465 = "tosa.mul"(%464, %29) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %466 = "tosa.add"(%463, %465) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %467 = "tosa.mul"(%464, %461) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %468 = "tosa.mul"(%467, %28) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %469 = "tosa.add"(%466, %468) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %470 = "tosa.mul"(%467, %461) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %471 = "tosa.mul"(%470, %27) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %472 = "tosa.add"(%469, %471) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %473 = "tosa.reciprocal"(%472) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %474 = "tosa.mul"(%473, %473) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %475 = "tosa.mul"(%474, %474) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %476 = "tosa.sub"(%32, %475) : (tensor<1x1x1xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %477 = "tosa.greater_equal"(%460, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xi1> | |
| %478 = "tosa.negate"(%476) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %479 = "tosa.select"(%477, %476, %478) : (tensor<1x128x3072xi1>, tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %480 = "tosa.add"(%479, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %481 = "tosa.mul"(%480, %26) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %482 = "tosa.mul"(%458, %481) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %483 = "tosa.matmul"(%482, %197) : (tensor<1x128x3072xf32>, tensor<1x3072x768xf32>) -> tensor<1x128x768xf32> | |
| %484 = "tosa.reshape"(%483) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %485 = "tosa.add"(%67, %484) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %486 = "tosa.reshape"(%485) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %487 = "tosa.add"(%486, %454) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %488 = "tosa.reduce_sum"(%487) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %489 = "tosa.mul"(%488, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %490 = "tosa.sub"(%487, %489) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %491 = "tosa.mul"(%490, %490) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %492 = "tosa.reduce_sum"(%491) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %493 = "tosa.mul"(%492, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %494 = "tosa.add"(%493, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %495 = "tosa.rsqrt"(%494) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %496 = "tosa.mul"(%490, %495) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %497 = "tosa.mul"(%496, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %498 = "tosa.add"(%497, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %499 = "tosa.matmul"(%498, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %500 = "tosa.reshape"(%499) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %501 = "tosa.add"(%67, %500) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %502 = "tosa.reshape"(%501) {new_shape = [1, 128, 12, -1]} : (tensor<128x768xf32>) -> tensor<1x128x12x64xf32> | |
| %503 = "tosa.transpose"(%502, %13) : (tensor<1x128x12x64xf32>, tensor<4xi64>) -> tensor<1x12x128x64xf32> | |
| %504 = "tosa.reshape"(%503) {new_shape = [-1, 128, 64]} : (tensor<1x12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %505 = "tosa.transpose"(%504, %14) : (tensor<12x128x64xf32>, tensor<3xi32>) -> tensor<12x64x128xf32> | |
| %506 = "tosa.matmul"(%504, %505) : (tensor<12x128x64xf32>, tensor<12x64x128xf32>) -> tensor<12x128x128xf32> | |
| %507 = "tosa.mul"(%506, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %508 = "tosa.matmul"(%504, %94) : (tensor<12x128x64xf32>, tensor<12x64x512xf32>) -> tensor<12x128x512xf32> | |
| %509 = "tosa.reshape"(%508) {new_shape = [1, 786432, 1]} : (tensor<12x128x512xf32>) -> tensor<1x786432x1xf32> | |
| %510 = "tosa.gather"(%509, %108) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %511 = "tosa.reshape"(%510) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %512 = "tosa.mul"(%511, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %513 = "tosa.add"(%512, %34) : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %514 = "tosa.gather"(%509, %125) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %515 = "tosa.reshape"(%514) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %516 = "tosa.transpose"(%515, %14) : (tensor<12x128x128xf32>, tensor<3xi32>) -> tensor<12x128x128xf32> | |
| %517 = "tosa.mul"(%516, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %518 = "tosa.add"(%513, %517) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %519 = "tosa.add"(%507, %518) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %520 = "tosa.reshape"(%519) {new_shape = [-1, 12, 128, 128]} : (tensor<12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %521 = "tosa.select"(%135, %7, %520) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %522 = "tosa.reduce_max"(%521) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %523 = "tosa.sub"(%521, %522) : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %524 = "tosa.exp"(%523) : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %525 = "tosa.reduce_sum"(%524) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %526 = "tosa.reciprocal"(%525) : (tensor<1x12x128x1xf32>) -> tensor<1x12x128x1xf32> | |
| %527 = "tosa.mul"(%524, %526) {shift = 0 : i32} : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %528 = "tosa.select"(%135, %33, %527) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %529 = "tosa.reshape"(%528) {new_shape = [-1, 128, 128]} : (tensor<1x12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %530 = "tosa.matmul"(%529, %504) : (tensor<12x128x128xf32>, tensor<12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %531 = "tosa.reshape"(%530) {new_shape = [-1, 12, 128, 64]} : (tensor<12x128x64xf32>) -> tensor<1x12x128x64xf32> | |
| %532 = "tosa.transpose"(%531, %13) : (tensor<1x12x128x64xf32>, tensor<4xi64>) -> tensor<1x128x12x64xf32> | |
| %533 = "tosa.reshape"(%532) {new_shape = [1, 128, 768]} : (tensor<1x128x12x64xf32>) -> tensor<1x128x768xf32> | |
| %534 = "tosa.matmul"(%533, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %535 = "tosa.reshape"(%534) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %536 = "tosa.add"(%67, %535) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %537 = "tosa.reshape"(%536) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %538 = "tosa.add"(%537, %498) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %539 = "tosa.reduce_sum"(%538) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %540 = "tosa.mul"(%539, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %541 = "tosa.sub"(%538, %540) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %542 = "tosa.mul"(%541, %541) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %543 = "tosa.reduce_sum"(%542) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %544 = "tosa.mul"(%543, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %545 = "tosa.add"(%544, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %546 = "tosa.rsqrt"(%545) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %547 = "tosa.mul"(%541, %546) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %548 = "tosa.mul"(%547, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %549 = "tosa.add"(%548, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %550 = "tosa.matmul"(%549, %166) : (tensor<1x128x768xf32>, tensor<1x768x3072xf32>) -> tensor<1x128x3072xf32> | |
| %551 = "tosa.reshape"(%550) {new_shape = [128, 3072]} : (tensor<1x128x3072xf32>) -> tensor<128x3072xf32> | |
| %552 = "tosa.add"(%169, %551) : (tensor<1x3072xf32>, tensor<128x3072xf32>) -> tensor<128x3072xf32> | |
| %553 = "tosa.reshape"(%552) {new_shape = [1, 128, 3072]} : (tensor<128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %554 = "tosa.sub"(%553, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %555 = "tosa.mul"(%554, %31) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %556 = "tosa.abs"(%555) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %557 = "tosa.mul"(%556, %30) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %558 = "tosa.add"(%557, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %559 = "tosa.mul"(%556, %556) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %560 = "tosa.mul"(%559, %29) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %561 = "tosa.add"(%558, %560) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %562 = "tosa.mul"(%559, %556) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %563 = "tosa.mul"(%562, %28) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %564 = "tosa.add"(%561, %563) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %565 = "tosa.mul"(%562, %556) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %566 = "tosa.mul"(%565, %27) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %567 = "tosa.add"(%564, %566) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %568 = "tosa.reciprocal"(%567) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %569 = "tosa.mul"(%568, %568) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %570 = "tosa.mul"(%569, %569) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %571 = "tosa.sub"(%32, %570) : (tensor<1x1x1xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %572 = "tosa.greater_equal"(%555, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xi1> | |
| %573 = "tosa.negate"(%571) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %574 = "tosa.select"(%572, %571, %573) : (tensor<1x128x3072xi1>, tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %575 = "tosa.add"(%574, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %576 = "tosa.mul"(%575, %26) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %577 = "tosa.mul"(%553, %576) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %578 = "tosa.matmul"(%577, %197) : (tensor<1x128x3072xf32>, tensor<1x3072x768xf32>) -> tensor<1x128x768xf32> | |
| %579 = "tosa.reshape"(%578) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %580 = "tosa.add"(%67, %579) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %581 = "tosa.reshape"(%580) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %582 = "tosa.add"(%581, %549) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %583 = "tosa.reduce_sum"(%582) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %584 = "tosa.mul"(%583, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %585 = "tosa.sub"(%582, %584) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %586 = "tosa.mul"(%585, %585) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %587 = "tosa.reduce_sum"(%586) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %588 = "tosa.mul"(%587, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %589 = "tosa.add"(%588, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %590 = "tosa.rsqrt"(%589) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %591 = "tosa.mul"(%585, %590) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %592 = "tosa.mul"(%591, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %593 = "tosa.add"(%592, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %594 = "tosa.matmul"(%593, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %595 = "tosa.reshape"(%594) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %596 = "tosa.add"(%67, %595) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %597 = "tosa.reshape"(%596) {new_shape = [1, 128, 12, -1]} : (tensor<128x768xf32>) -> tensor<1x128x12x64xf32> | |
| %598 = "tosa.transpose"(%597, %13) : (tensor<1x128x12x64xf32>, tensor<4xi64>) -> tensor<1x12x128x64xf32> | |
| %599 = "tosa.reshape"(%598) {new_shape = [-1, 128, 64]} : (tensor<1x12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %600 = "tosa.transpose"(%599, %14) : (tensor<12x128x64xf32>, tensor<3xi32>) -> tensor<12x64x128xf32> | |
| %601 = "tosa.matmul"(%599, %600) : (tensor<12x128x64xf32>, tensor<12x64x128xf32>) -> tensor<12x128x128xf32> | |
| %602 = "tosa.mul"(%601, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %603 = "tosa.matmul"(%599, %94) : (tensor<12x128x64xf32>, tensor<12x64x512xf32>) -> tensor<12x128x512xf32> | |
| %604 = "tosa.reshape"(%603) {new_shape = [1, 786432, 1]} : (tensor<12x128x512xf32>) -> tensor<1x786432x1xf32> | |
| %605 = "tosa.gather"(%604, %108) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %606 = "tosa.reshape"(%605) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %607 = "tosa.mul"(%606, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %608 = "tosa.add"(%607, %34) : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %609 = "tosa.gather"(%604, %125) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %610 = "tosa.reshape"(%609) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %611 = "tosa.transpose"(%610, %14) : (tensor<12x128x128xf32>, tensor<3xi32>) -> tensor<12x128x128xf32> | |
| %612 = "tosa.mul"(%611, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %613 = "tosa.add"(%608, %612) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %614 = "tosa.add"(%602, %613) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %615 = "tosa.reshape"(%614) {new_shape = [-1, 12, 128, 128]} : (tensor<12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %616 = "tosa.select"(%135, %7, %615) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %617 = "tosa.reduce_max"(%616) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %618 = "tosa.sub"(%616, %617) : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %619 = "tosa.exp"(%618) : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %620 = "tosa.reduce_sum"(%619) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %621 = "tosa.reciprocal"(%620) : (tensor<1x12x128x1xf32>) -> tensor<1x12x128x1xf32> | |
| %622 = "tosa.mul"(%619, %621) {shift = 0 : i32} : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %623 = "tosa.select"(%135, %33, %622) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %624 = "tosa.reshape"(%623) {new_shape = [-1, 128, 128]} : (tensor<1x12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %625 = "tosa.matmul"(%624, %599) : (tensor<12x128x128xf32>, tensor<12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %626 = "tosa.reshape"(%625) {new_shape = [-1, 12, 128, 64]} : (tensor<12x128x64xf32>) -> tensor<1x12x128x64xf32> | |
| %627 = "tosa.transpose"(%626, %13) : (tensor<1x12x128x64xf32>, tensor<4xi64>) -> tensor<1x128x12x64xf32> | |
| %628 = "tosa.reshape"(%627) {new_shape = [1, 128, 768]} : (tensor<1x128x12x64xf32>) -> tensor<1x128x768xf32> | |
| %629 = "tosa.matmul"(%628, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %630 = "tosa.reshape"(%629) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %631 = "tosa.add"(%67, %630) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %632 = "tosa.reshape"(%631) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %633 = "tosa.add"(%632, %593) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %634 = "tosa.reduce_sum"(%633) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %635 = "tosa.mul"(%634, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %636 = "tosa.sub"(%633, %635) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %637 = "tosa.mul"(%636, %636) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %638 = "tosa.reduce_sum"(%637) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %639 = "tosa.mul"(%638, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %640 = "tosa.add"(%639, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %641 = "tosa.rsqrt"(%640) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %642 = "tosa.mul"(%636, %641) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %643 = "tosa.mul"(%642, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %644 = "tosa.add"(%643, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %645 = "tosa.matmul"(%644, %166) : (tensor<1x128x768xf32>, tensor<1x768x3072xf32>) -> tensor<1x128x3072xf32> | |
| %646 = "tosa.reshape"(%645) {new_shape = [128, 3072]} : (tensor<1x128x3072xf32>) -> tensor<128x3072xf32> | |
| %647 = "tosa.add"(%169, %646) : (tensor<1x3072xf32>, tensor<128x3072xf32>) -> tensor<128x3072xf32> | |
| %648 = "tosa.reshape"(%647) {new_shape = [1, 128, 3072]} : (tensor<128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %649 = "tosa.sub"(%648, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %650 = "tosa.mul"(%649, %31) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %651 = "tosa.abs"(%650) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %652 = "tosa.mul"(%651, %30) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %653 = "tosa.add"(%652, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %654 = "tosa.mul"(%651, %651) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %655 = "tosa.mul"(%654, %29) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %656 = "tosa.add"(%653, %655) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %657 = "tosa.mul"(%654, %651) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %658 = "tosa.mul"(%657, %28) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %659 = "tosa.add"(%656, %658) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %660 = "tosa.mul"(%657, %651) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %661 = "tosa.mul"(%660, %27) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %662 = "tosa.add"(%659, %661) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %663 = "tosa.reciprocal"(%662) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %664 = "tosa.mul"(%663, %663) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %665 = "tosa.mul"(%664, %664) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %666 = "tosa.sub"(%32, %665) : (tensor<1x1x1xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %667 = "tosa.greater_equal"(%650, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xi1> | |
| %668 = "tosa.negate"(%666) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %669 = "tosa.select"(%667, %666, %668) : (tensor<1x128x3072xi1>, tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %670 = "tosa.add"(%669, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %671 = "tosa.mul"(%670, %26) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %672 = "tosa.mul"(%648, %671) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %673 = "tosa.matmul"(%672, %197) : (tensor<1x128x3072xf32>, tensor<1x3072x768xf32>) -> tensor<1x128x768xf32> | |
| %674 = "tosa.reshape"(%673) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %675 = "tosa.add"(%67, %674) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %676 = "tosa.reshape"(%675) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %677 = "tosa.add"(%676, %644) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %678 = "tosa.reduce_sum"(%677) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %679 = "tosa.mul"(%678, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %680 = "tosa.sub"(%677, %679) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %681 = "tosa.mul"(%680, %680) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %682 = "tosa.reduce_sum"(%681) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %683 = "tosa.mul"(%682, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %684 = "tosa.add"(%683, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %685 = "tosa.rsqrt"(%684) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %686 = "tosa.mul"(%680, %685) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %687 = "tosa.mul"(%686, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %688 = "tosa.add"(%687, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %689 = "tosa.matmul"(%688, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %690 = "tosa.reshape"(%689) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %691 = "tosa.add"(%67, %690) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %692 = "tosa.reshape"(%691) {new_shape = [1, 128, 12, -1]} : (tensor<128x768xf32>) -> tensor<1x128x12x64xf32> | |
| %693 = "tosa.transpose"(%692, %13) : (tensor<1x128x12x64xf32>, tensor<4xi64>) -> tensor<1x12x128x64xf32> | |
| %694 = "tosa.reshape"(%693) {new_shape = [-1, 128, 64]} : (tensor<1x12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %695 = "tosa.transpose"(%694, %14) : (tensor<12x128x64xf32>, tensor<3xi32>) -> tensor<12x64x128xf32> | |
| %696 = "tosa.matmul"(%694, %695) : (tensor<12x128x64xf32>, tensor<12x64x128xf32>) -> tensor<12x128x128xf32> | |
| %697 = "tosa.mul"(%696, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %698 = "tosa.matmul"(%694, %94) : (tensor<12x128x64xf32>, tensor<12x64x512xf32>) -> tensor<12x128x512xf32> | |
| %699 = "tosa.reshape"(%698) {new_shape = [1, 786432, 1]} : (tensor<12x128x512xf32>) -> tensor<1x786432x1xf32> | |
| %700 = "tosa.gather"(%699, %108) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %701 = "tosa.reshape"(%700) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %702 = "tosa.mul"(%701, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %703 = "tosa.add"(%702, %34) : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %704 = "tosa.gather"(%699, %125) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %705 = "tosa.reshape"(%704) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %706 = "tosa.transpose"(%705, %14) : (tensor<12x128x128xf32>, tensor<3xi32>) -> tensor<12x128x128xf32> | |
| %707 = "tosa.mul"(%706, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %708 = "tosa.add"(%703, %707) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %709 = "tosa.add"(%697, %708) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %710 = "tosa.reshape"(%709) {new_shape = [-1, 12, 128, 128]} : (tensor<12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %711 = "tosa.select"(%135, %7, %710) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %712 = "tosa.reduce_max"(%711) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %713 = "tosa.sub"(%711, %712) : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %714 = "tosa.exp"(%713) : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %715 = "tosa.reduce_sum"(%714) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %716 = "tosa.reciprocal"(%715) : (tensor<1x12x128x1xf32>) -> tensor<1x12x128x1xf32> | |
| %717 = "tosa.mul"(%714, %716) {shift = 0 : i32} : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %718 = "tosa.select"(%135, %33, %717) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %719 = "tosa.reshape"(%718) {new_shape = [-1, 128, 128]} : (tensor<1x12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %720 = "tosa.matmul"(%719, %694) : (tensor<12x128x128xf32>, tensor<12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %721 = "tosa.reshape"(%720) {new_shape = [-1, 12, 128, 64]} : (tensor<12x128x64xf32>) -> tensor<1x12x128x64xf32> | |
| %722 = "tosa.transpose"(%721, %13) : (tensor<1x12x128x64xf32>, tensor<4xi64>) -> tensor<1x128x12x64xf32> | |
| %723 = "tosa.reshape"(%722) {new_shape = [1, 128, 768]} : (tensor<1x128x12x64xf32>) -> tensor<1x128x768xf32> | |
| %724 = "tosa.matmul"(%723, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %725 = "tosa.reshape"(%724) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %726 = "tosa.add"(%67, %725) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %727 = "tosa.reshape"(%726) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %728 = "tosa.add"(%727, %688) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %729 = "tosa.reduce_sum"(%728) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %730 = "tosa.mul"(%729, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %731 = "tosa.sub"(%728, %730) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %732 = "tosa.mul"(%731, %731) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %733 = "tosa.reduce_sum"(%732) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %734 = "tosa.mul"(%733, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %735 = "tosa.add"(%734, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %736 = "tosa.rsqrt"(%735) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %737 = "tosa.mul"(%731, %736) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %738 = "tosa.mul"(%737, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %739 = "tosa.add"(%738, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %740 = "tosa.matmul"(%739, %166) : (tensor<1x128x768xf32>, tensor<1x768x3072xf32>) -> tensor<1x128x3072xf32> | |
| %741 = "tosa.reshape"(%740) {new_shape = [128, 3072]} : (tensor<1x128x3072xf32>) -> tensor<128x3072xf32> | |
| %742 = "tosa.add"(%169, %741) : (tensor<1x3072xf32>, tensor<128x3072xf32>) -> tensor<128x3072xf32> | |
| %743 = "tosa.reshape"(%742) {new_shape = [1, 128, 3072]} : (tensor<128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %744 = "tosa.sub"(%743, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %745 = "tosa.mul"(%744, %31) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %746 = "tosa.abs"(%745) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %747 = "tosa.mul"(%746, %30) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %748 = "tosa.add"(%747, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %749 = "tosa.mul"(%746, %746) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %750 = "tosa.mul"(%749, %29) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %751 = "tosa.add"(%748, %750) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %752 = "tosa.mul"(%749, %746) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %753 = "tosa.mul"(%752, %28) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %754 = "tosa.add"(%751, %753) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %755 = "tosa.mul"(%752, %746) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %756 = "tosa.mul"(%755, %27) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %757 = "tosa.add"(%754, %756) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %758 = "tosa.reciprocal"(%757) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %759 = "tosa.mul"(%758, %758) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %760 = "tosa.mul"(%759, %759) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %761 = "tosa.sub"(%32, %760) : (tensor<1x1x1xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %762 = "tosa.greater_equal"(%745, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xi1> | |
| %763 = "tosa.negate"(%761) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %764 = "tosa.select"(%762, %761, %763) : (tensor<1x128x3072xi1>, tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %765 = "tosa.add"(%764, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %766 = "tosa.mul"(%765, %26) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %767 = "tosa.mul"(%743, %766) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %768 = "tosa.matmul"(%767, %197) : (tensor<1x128x3072xf32>, tensor<1x3072x768xf32>) -> tensor<1x128x768xf32> | |
| %769 = "tosa.reshape"(%768) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %770 = "tosa.add"(%67, %769) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %771 = "tosa.reshape"(%770) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %772 = "tosa.add"(%771, %739) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %773 = "tosa.reduce_sum"(%772) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %774 = "tosa.mul"(%773, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %775 = "tosa.sub"(%772, %774) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %776 = "tosa.mul"(%775, %775) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %777 = "tosa.reduce_sum"(%776) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %778 = "tosa.mul"(%777, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %779 = "tosa.add"(%778, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %780 = "tosa.rsqrt"(%779) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %781 = "tosa.mul"(%775, %780) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %782 = "tosa.mul"(%781, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %783 = "tosa.add"(%782, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %784 = "tosa.matmul"(%783, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %785 = "tosa.reshape"(%784) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %786 = "tosa.add"(%67, %785) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %787 = "tosa.reshape"(%786) {new_shape = [1, 128, 12, -1]} : (tensor<128x768xf32>) -> tensor<1x128x12x64xf32> | |
| %788 = "tosa.transpose"(%787, %13) : (tensor<1x128x12x64xf32>, tensor<4xi64>) -> tensor<1x12x128x64xf32> | |
| %789 = "tosa.reshape"(%788) {new_shape = [-1, 128, 64]} : (tensor<1x12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %790 = "tosa.transpose"(%789, %14) : (tensor<12x128x64xf32>, tensor<3xi32>) -> tensor<12x64x128xf32> | |
| %791 = "tosa.matmul"(%789, %790) : (tensor<12x128x64xf32>, tensor<12x64x128xf32>) -> tensor<12x128x128xf32> | |
| %792 = "tosa.mul"(%791, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %793 = "tosa.matmul"(%789, %94) : (tensor<12x128x64xf32>, tensor<12x64x512xf32>) -> tensor<12x128x512xf32> | |
| %794 = "tosa.reshape"(%793) {new_shape = [1, 786432, 1]} : (tensor<12x128x512xf32>) -> tensor<1x786432x1xf32> | |
| %795 = "tosa.gather"(%794, %108) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %796 = "tosa.reshape"(%795) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %797 = "tosa.mul"(%796, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %798 = "tosa.add"(%797, %34) : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %799 = "tosa.gather"(%794, %125) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %800 = "tosa.reshape"(%799) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %801 = "tosa.transpose"(%800, %14) : (tensor<12x128x128xf32>, tensor<3xi32>) -> tensor<12x128x128xf32> | |
| %802 = "tosa.mul"(%801, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %803 = "tosa.add"(%798, %802) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %804 = "tosa.add"(%792, %803) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %805 = "tosa.reshape"(%804) {new_shape = [-1, 12, 128, 128]} : (tensor<12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %806 = "tosa.select"(%135, %7, %805) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %807 = "tosa.reduce_max"(%806) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %808 = "tosa.sub"(%806, %807) : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %809 = "tosa.exp"(%808) : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %810 = "tosa.reduce_sum"(%809) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %811 = "tosa.reciprocal"(%810) : (tensor<1x12x128x1xf32>) -> tensor<1x12x128x1xf32> | |
| %812 = "tosa.mul"(%809, %811) {shift = 0 : i32} : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %813 = "tosa.select"(%135, %33, %812) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %814 = "tosa.reshape"(%813) {new_shape = [-1, 128, 128]} : (tensor<1x12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %815 = "tosa.matmul"(%814, %789) : (tensor<12x128x128xf32>, tensor<12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %816 = "tosa.reshape"(%815) {new_shape = [-1, 12, 128, 64]} : (tensor<12x128x64xf32>) -> tensor<1x12x128x64xf32> | |
| %817 = "tosa.transpose"(%816, %13) : (tensor<1x12x128x64xf32>, tensor<4xi64>) -> tensor<1x128x12x64xf32> | |
| %818 = "tosa.reshape"(%817) {new_shape = [1, 128, 768]} : (tensor<1x128x12x64xf32>) -> tensor<1x128x768xf32> | |
| %819 = "tosa.matmul"(%818, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %820 = "tosa.reshape"(%819) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %821 = "tosa.add"(%67, %820) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %822 = "tosa.reshape"(%821) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %823 = "tosa.add"(%822, %783) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %824 = "tosa.reduce_sum"(%823) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %825 = "tosa.mul"(%824, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %826 = "tosa.sub"(%823, %825) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %827 = "tosa.mul"(%826, %826) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %828 = "tosa.reduce_sum"(%827) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %829 = "tosa.mul"(%828, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %830 = "tosa.add"(%829, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %831 = "tosa.rsqrt"(%830) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %832 = "tosa.mul"(%826, %831) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %833 = "tosa.mul"(%832, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %834 = "tosa.add"(%833, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %835 = "tosa.matmul"(%834, %166) : (tensor<1x128x768xf32>, tensor<1x768x3072xf32>) -> tensor<1x128x3072xf32> | |
| %836 = "tosa.reshape"(%835) {new_shape = [128, 3072]} : (tensor<1x128x3072xf32>) -> tensor<128x3072xf32> | |
| %837 = "tosa.add"(%169, %836) : (tensor<1x3072xf32>, tensor<128x3072xf32>) -> tensor<128x3072xf32> | |
| %838 = "tosa.reshape"(%837) {new_shape = [1, 128, 3072]} : (tensor<128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %839 = "tosa.sub"(%838, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %840 = "tosa.mul"(%839, %31) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %841 = "tosa.abs"(%840) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %842 = "tosa.mul"(%841, %30) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %843 = "tosa.add"(%842, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %844 = "tosa.mul"(%841, %841) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %845 = "tosa.mul"(%844, %29) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %846 = "tosa.add"(%843, %845) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %847 = "tosa.mul"(%844, %841) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %848 = "tosa.mul"(%847, %28) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %849 = "tosa.add"(%846, %848) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %850 = "tosa.mul"(%847, %841) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %851 = "tosa.mul"(%850, %27) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %852 = "tosa.add"(%849, %851) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %853 = "tosa.reciprocal"(%852) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %854 = "tosa.mul"(%853, %853) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %855 = "tosa.mul"(%854, %854) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %856 = "tosa.sub"(%32, %855) : (tensor<1x1x1xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %857 = "tosa.greater_equal"(%840, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xi1> | |
| %858 = "tosa.negate"(%856) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %859 = "tosa.select"(%857, %856, %858) : (tensor<1x128x3072xi1>, tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %860 = "tosa.add"(%859, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %861 = "tosa.mul"(%860, %26) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %862 = "tosa.mul"(%838, %861) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %863 = "tosa.matmul"(%862, %197) : (tensor<1x128x3072xf32>, tensor<1x3072x768xf32>) -> tensor<1x128x768xf32> | |
| %864 = "tosa.reshape"(%863) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %865 = "tosa.add"(%67, %864) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %866 = "tosa.reshape"(%865) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %867 = "tosa.add"(%866, %834) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %868 = "tosa.reduce_sum"(%867) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %869 = "tosa.mul"(%868, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %870 = "tosa.sub"(%867, %869) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %871 = "tosa.mul"(%870, %870) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %872 = "tosa.reduce_sum"(%871) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %873 = "tosa.mul"(%872, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %874 = "tosa.add"(%873, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %875 = "tosa.rsqrt"(%874) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %876 = "tosa.mul"(%870, %875) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %877 = "tosa.mul"(%876, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %878 = "tosa.add"(%877, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %879 = "tosa.matmul"(%878, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %880 = "tosa.reshape"(%879) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %881 = "tosa.add"(%67, %880) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %882 = "tosa.reshape"(%881) {new_shape = [1, 128, 12, -1]} : (tensor<128x768xf32>) -> tensor<1x128x12x64xf32> | |
| %883 = "tosa.transpose"(%882, %13) : (tensor<1x128x12x64xf32>, tensor<4xi64>) -> tensor<1x12x128x64xf32> | |
| %884 = "tosa.reshape"(%883) {new_shape = [-1, 128, 64]} : (tensor<1x12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %885 = "tosa.transpose"(%884, %14) : (tensor<12x128x64xf32>, tensor<3xi32>) -> tensor<12x64x128xf32> | |
| %886 = "tosa.matmul"(%884, %885) : (tensor<12x128x64xf32>, tensor<12x64x128xf32>) -> tensor<12x128x128xf32> | |
| %887 = "tosa.mul"(%886, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %888 = "tosa.matmul"(%884, %94) : (tensor<12x128x64xf32>, tensor<12x64x512xf32>) -> tensor<12x128x512xf32> | |
| %889 = "tosa.reshape"(%888) {new_shape = [1, 786432, 1]} : (tensor<12x128x512xf32>) -> tensor<1x786432x1xf32> | |
| %890 = "tosa.gather"(%889, %108) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %891 = "tosa.reshape"(%890) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %892 = "tosa.mul"(%891, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %893 = "tosa.add"(%892, %34) : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %894 = "tosa.gather"(%889, %125) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %895 = "tosa.reshape"(%894) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %896 = "tosa.transpose"(%895, %14) : (tensor<12x128x128xf32>, tensor<3xi32>) -> tensor<12x128x128xf32> | |
| %897 = "tosa.mul"(%896, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %898 = "tosa.add"(%893, %897) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %899 = "tosa.add"(%887, %898) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %900 = "tosa.reshape"(%899) {new_shape = [-1, 12, 128, 128]} : (tensor<12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %901 = "tosa.select"(%135, %7, %900) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %902 = "tosa.reduce_max"(%901) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %903 = "tosa.sub"(%901, %902) : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %904 = "tosa.exp"(%903) : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %905 = "tosa.reduce_sum"(%904) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %906 = "tosa.reciprocal"(%905) : (tensor<1x12x128x1xf32>) -> tensor<1x12x128x1xf32> | |
| %907 = "tosa.mul"(%904, %906) {shift = 0 : i32} : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %908 = "tosa.select"(%135, %33, %907) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %909 = "tosa.reshape"(%908) {new_shape = [-1, 128, 128]} : (tensor<1x12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %910 = "tosa.matmul"(%909, %884) : (tensor<12x128x128xf32>, tensor<12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %911 = "tosa.reshape"(%910) {new_shape = [-1, 12, 128, 64]} : (tensor<12x128x64xf32>) -> tensor<1x12x128x64xf32> | |
| %912 = "tosa.transpose"(%911, %13) : (tensor<1x12x128x64xf32>, tensor<4xi64>) -> tensor<1x128x12x64xf32> | |
| %913 = "tosa.reshape"(%912) {new_shape = [1, 128, 768]} : (tensor<1x128x12x64xf32>) -> tensor<1x128x768xf32> | |
| %914 = "tosa.matmul"(%913, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %915 = "tosa.reshape"(%914) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %916 = "tosa.add"(%67, %915) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %917 = "tosa.reshape"(%916) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %918 = "tosa.add"(%917, %878) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %919 = "tosa.reduce_sum"(%918) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %920 = "tosa.mul"(%919, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %921 = "tosa.sub"(%918, %920) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %922 = "tosa.mul"(%921, %921) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %923 = "tosa.reduce_sum"(%922) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %924 = "tosa.mul"(%923, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %925 = "tosa.add"(%924, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %926 = "tosa.rsqrt"(%925) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %927 = "tosa.mul"(%921, %926) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %928 = "tosa.mul"(%927, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %929 = "tosa.add"(%928, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %930 = "tosa.matmul"(%929, %166) : (tensor<1x128x768xf32>, tensor<1x768x3072xf32>) -> tensor<1x128x3072xf32> | |
| %931 = "tosa.reshape"(%930) {new_shape = [128, 3072]} : (tensor<1x128x3072xf32>) -> tensor<128x3072xf32> | |
| %932 = "tosa.add"(%169, %931) : (tensor<1x3072xf32>, tensor<128x3072xf32>) -> tensor<128x3072xf32> | |
| %933 = "tosa.reshape"(%932) {new_shape = [1, 128, 3072]} : (tensor<128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %934 = "tosa.sub"(%933, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %935 = "tosa.mul"(%934, %31) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %936 = "tosa.abs"(%935) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %937 = "tosa.mul"(%936, %30) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %938 = "tosa.add"(%937, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %939 = "tosa.mul"(%936, %936) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %940 = "tosa.mul"(%939, %29) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %941 = "tosa.add"(%938, %940) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %942 = "tosa.mul"(%939, %936) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %943 = "tosa.mul"(%942, %28) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %944 = "tosa.add"(%941, %943) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %945 = "tosa.mul"(%942, %936) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %946 = "tosa.mul"(%945, %27) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %947 = "tosa.add"(%944, %946) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %948 = "tosa.reciprocal"(%947) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %949 = "tosa.mul"(%948, %948) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %950 = "tosa.mul"(%949, %949) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %951 = "tosa.sub"(%32, %950) : (tensor<1x1x1xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %952 = "tosa.greater_equal"(%935, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xi1> | |
| %953 = "tosa.negate"(%951) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %954 = "tosa.select"(%952, %951, %953) : (tensor<1x128x3072xi1>, tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %955 = "tosa.add"(%954, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %956 = "tosa.mul"(%955, %26) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %957 = "tosa.mul"(%933, %956) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %958 = "tosa.matmul"(%957, %197) : (tensor<1x128x3072xf32>, tensor<1x3072x768xf32>) -> tensor<1x128x768xf32> | |
| %959 = "tosa.reshape"(%958) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %960 = "tosa.add"(%67, %959) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %961 = "tosa.reshape"(%960) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %962 = "tosa.add"(%961, %929) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %963 = "tosa.reduce_sum"(%962) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %964 = "tosa.mul"(%963, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %965 = "tosa.sub"(%962, %964) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %966 = "tosa.mul"(%965, %965) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %967 = "tosa.reduce_sum"(%966) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %968 = "tosa.mul"(%967, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %969 = "tosa.add"(%968, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %970 = "tosa.rsqrt"(%969) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %971 = "tosa.mul"(%965, %970) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %972 = "tosa.mul"(%971, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %973 = "tosa.add"(%972, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %974 = "tosa.matmul"(%973, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %975 = "tosa.reshape"(%974) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %976 = "tosa.add"(%67, %975) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %977 = "tosa.reshape"(%976) {new_shape = [1, 128, 12, -1]} : (tensor<128x768xf32>) -> tensor<1x128x12x64xf32> | |
| %978 = "tosa.transpose"(%977, %13) : (tensor<1x128x12x64xf32>, tensor<4xi64>) -> tensor<1x12x128x64xf32> | |
| %979 = "tosa.reshape"(%978) {new_shape = [-1, 128, 64]} : (tensor<1x12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %980 = "tosa.transpose"(%979, %14) : (tensor<12x128x64xf32>, tensor<3xi32>) -> tensor<12x64x128xf32> | |
| %981 = "tosa.matmul"(%979, %980) : (tensor<12x128x64xf32>, tensor<12x64x128xf32>) -> tensor<12x128x128xf32> | |
| %982 = "tosa.mul"(%981, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %983 = "tosa.matmul"(%979, %94) : (tensor<12x128x64xf32>, tensor<12x64x512xf32>) -> tensor<12x128x512xf32> | |
| %984 = "tosa.reshape"(%983) {new_shape = [1, 786432, 1]} : (tensor<12x128x512xf32>) -> tensor<1x786432x1xf32> | |
| %985 = "tosa.gather"(%984, %108) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %986 = "tosa.reshape"(%985) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %987 = "tosa.mul"(%986, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %988 = "tosa.add"(%987, %34) : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %989 = "tosa.gather"(%984, %125) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %990 = "tosa.reshape"(%989) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %991 = "tosa.transpose"(%990, %14) : (tensor<12x128x128xf32>, tensor<3xi32>) -> tensor<12x128x128xf32> | |
| %992 = "tosa.mul"(%991, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %993 = "tosa.add"(%988, %992) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %994 = "tosa.add"(%982, %993) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %995 = "tosa.reshape"(%994) {new_shape = [-1, 12, 128, 128]} : (tensor<12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %996 = "tosa.select"(%135, %7, %995) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %997 = "tosa.reduce_max"(%996) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %998 = "tosa.sub"(%996, %997) : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %999 = "tosa.exp"(%998) : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %1000 = "tosa.reduce_sum"(%999) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %1001 = "tosa.reciprocal"(%1000) : (tensor<1x12x128x1xf32>) -> tensor<1x12x128x1xf32> | |
| %1002 = "tosa.mul"(%999, %1001) {shift = 0 : i32} : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %1003 = "tosa.select"(%135, %33, %1002) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %1004 = "tosa.reshape"(%1003) {new_shape = [-1, 128, 128]} : (tensor<1x12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %1005 = "tosa.matmul"(%1004, %979) : (tensor<12x128x128xf32>, tensor<12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %1006 = "tosa.reshape"(%1005) {new_shape = [-1, 12, 128, 64]} : (tensor<12x128x64xf32>) -> tensor<1x12x128x64xf32> | |
| %1007 = "tosa.transpose"(%1006, %13) : (tensor<1x12x128x64xf32>, tensor<4xi64>) -> tensor<1x128x12x64xf32> | |
| %1008 = "tosa.reshape"(%1007) {new_shape = [1, 128, 768]} : (tensor<1x128x12x64xf32>) -> tensor<1x128x768xf32> | |
| %1009 = "tosa.matmul"(%1008, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %1010 = "tosa.reshape"(%1009) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %1011 = "tosa.add"(%67, %1010) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %1012 = "tosa.reshape"(%1011) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %1013 = "tosa.add"(%1012, %973) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %1014 = "tosa.reduce_sum"(%1013) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %1015 = "tosa.mul"(%1014, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1016 = "tosa.sub"(%1013, %1015) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %1017 = "tosa.mul"(%1016, %1016) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %1018 = "tosa.reduce_sum"(%1017) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %1019 = "tosa.mul"(%1018, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1020 = "tosa.add"(%1019, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1021 = "tosa.rsqrt"(%1020) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %1022 = "tosa.mul"(%1016, %1021) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %1023 = "tosa.mul"(%1022, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %1024 = "tosa.add"(%1023, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %1025 = "tosa.matmul"(%1024, %166) : (tensor<1x128x768xf32>, tensor<1x768x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1026 = "tosa.reshape"(%1025) {new_shape = [128, 3072]} : (tensor<1x128x3072xf32>) -> tensor<128x3072xf32> | |
| %1027 = "tosa.add"(%169, %1026) : (tensor<1x3072xf32>, tensor<128x3072xf32>) -> tensor<128x3072xf32> | |
| %1028 = "tosa.reshape"(%1027) {new_shape = [1, 128, 3072]} : (tensor<128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1029 = "tosa.sub"(%1028, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1030 = "tosa.mul"(%1029, %31) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1031 = "tosa.abs"(%1030) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1032 = "tosa.mul"(%1031, %30) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1033 = "tosa.add"(%1032, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1034 = "tosa.mul"(%1031, %1031) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1035 = "tosa.mul"(%1034, %29) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1036 = "tosa.add"(%1033, %1035) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1037 = "tosa.mul"(%1034, %1031) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1038 = "tosa.mul"(%1037, %28) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1039 = "tosa.add"(%1036, %1038) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1040 = "tosa.mul"(%1037, %1031) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1041 = "tosa.mul"(%1040, %27) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1042 = "tosa.add"(%1039, %1041) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1043 = "tosa.reciprocal"(%1042) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1044 = "tosa.mul"(%1043, %1043) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1045 = "tosa.mul"(%1044, %1044) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1046 = "tosa.sub"(%32, %1045) : (tensor<1x1x1xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1047 = "tosa.greater_equal"(%1030, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xi1> | |
| %1048 = "tosa.negate"(%1046) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1049 = "tosa.select"(%1047, %1046, %1048) : (tensor<1x128x3072xi1>, tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1050 = "tosa.add"(%1049, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1051 = "tosa.mul"(%1050, %26) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1052 = "tosa.mul"(%1028, %1051) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1053 = "tosa.matmul"(%1052, %197) : (tensor<1x128x3072xf32>, tensor<1x3072x768xf32>) -> tensor<1x128x768xf32> | |
| %1054 = "tosa.reshape"(%1053) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %1055 = "tosa.add"(%67, %1054) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %1056 = "tosa.reshape"(%1055) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %1057 = "tosa.add"(%1056, %1024) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %1058 = "tosa.reduce_sum"(%1057) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %1059 = "tosa.mul"(%1058, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1060 = "tosa.sub"(%1057, %1059) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %1061 = "tosa.mul"(%1060, %1060) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %1062 = "tosa.reduce_sum"(%1061) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %1063 = "tosa.mul"(%1062, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1064 = "tosa.add"(%1063, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1065 = "tosa.rsqrt"(%1064) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %1066 = "tosa.mul"(%1060, %1065) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %1067 = "tosa.mul"(%1066, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %1068 = "tosa.add"(%1067, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %1069 = "tosa.matmul"(%1068, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %1070 = "tosa.reshape"(%1069) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %1071 = "tosa.add"(%67, %1070) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %1072 = "tosa.reshape"(%1071) {new_shape = [1, 128, 12, -1]} : (tensor<128x768xf32>) -> tensor<1x128x12x64xf32> | |
| %1073 = "tosa.transpose"(%1072, %13) : (tensor<1x128x12x64xf32>, tensor<4xi64>) -> tensor<1x12x128x64xf32> | |
| %1074 = "tosa.reshape"(%1073) {new_shape = [-1, 128, 64]} : (tensor<1x12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %1075 = "tosa.transpose"(%1074, %14) : (tensor<12x128x64xf32>, tensor<3xi32>) -> tensor<12x64x128xf32> | |
| %1076 = "tosa.matmul"(%1074, %1075) : (tensor<12x128x64xf32>, tensor<12x64x128xf32>) -> tensor<12x128x128xf32> | |
| %1077 = "tosa.mul"(%1076, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %1078 = "tosa.matmul"(%1074, %94) : (tensor<12x128x64xf32>, tensor<12x64x512xf32>) -> tensor<12x128x512xf32> | |
| %1079 = "tosa.reshape"(%1078) {new_shape = [1, 786432, 1]} : (tensor<12x128x512xf32>) -> tensor<1x786432x1xf32> | |
| %1080 = "tosa.gather"(%1079, %108) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %1081 = "tosa.reshape"(%1080) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %1082 = "tosa.mul"(%1081, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %1083 = "tosa.add"(%1082, %34) : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %1084 = "tosa.gather"(%1079, %125) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %1085 = "tosa.reshape"(%1084) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %1086 = "tosa.transpose"(%1085, %14) : (tensor<12x128x128xf32>, tensor<3xi32>) -> tensor<12x128x128xf32> | |
| %1087 = "tosa.mul"(%1086, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %1088 = "tosa.add"(%1083, %1087) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %1089 = "tosa.add"(%1077, %1088) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %1090 = "tosa.reshape"(%1089) {new_shape = [-1, 12, 128, 128]} : (tensor<12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %1091 = "tosa.select"(%135, %7, %1090) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %1092 = "tosa.reduce_max"(%1091) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %1093 = "tosa.sub"(%1091, %1092) : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %1094 = "tosa.exp"(%1093) : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %1095 = "tosa.reduce_sum"(%1094) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %1096 = "tosa.reciprocal"(%1095) : (tensor<1x12x128x1xf32>) -> tensor<1x12x128x1xf32> | |
| %1097 = "tosa.mul"(%1094, %1096) {shift = 0 : i32} : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %1098 = "tosa.select"(%135, %33, %1097) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %1099 = "tosa.reshape"(%1098) {new_shape = [-1, 128, 128]} : (tensor<1x12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %1100 = "tosa.matmul"(%1099, %1074) : (tensor<12x128x128xf32>, tensor<12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %1101 = "tosa.reshape"(%1100) {new_shape = [-1, 12, 128, 64]} : (tensor<12x128x64xf32>) -> tensor<1x12x128x64xf32> | |
| %1102 = "tosa.transpose"(%1101, %13) : (tensor<1x12x128x64xf32>, tensor<4xi64>) -> tensor<1x128x12x64xf32> | |
| %1103 = "tosa.reshape"(%1102) {new_shape = [1, 128, 768]} : (tensor<1x128x12x64xf32>) -> tensor<1x128x768xf32> | |
| %1104 = "tosa.matmul"(%1103, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %1105 = "tosa.reshape"(%1104) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %1106 = "tosa.add"(%67, %1105) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %1107 = "tosa.reshape"(%1106) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %1108 = "tosa.add"(%1107, %1068) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %1109 = "tosa.reduce_sum"(%1108) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %1110 = "tosa.mul"(%1109, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1111 = "tosa.sub"(%1108, %1110) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %1112 = "tosa.mul"(%1111, %1111) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %1113 = "tosa.reduce_sum"(%1112) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %1114 = "tosa.mul"(%1113, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1115 = "tosa.add"(%1114, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1116 = "tosa.rsqrt"(%1115) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %1117 = "tosa.mul"(%1111, %1116) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %1118 = "tosa.mul"(%1117, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %1119 = "tosa.add"(%1118, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %1120 = "tosa.matmul"(%1119, %166) : (tensor<1x128x768xf32>, tensor<1x768x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1121 = "tosa.reshape"(%1120) {new_shape = [128, 3072]} : (tensor<1x128x3072xf32>) -> tensor<128x3072xf32> | |
| %1122 = "tosa.add"(%169, %1121) : (tensor<1x3072xf32>, tensor<128x3072xf32>) -> tensor<128x3072xf32> | |
| %1123 = "tosa.reshape"(%1122) {new_shape = [1, 128, 3072]} : (tensor<128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1124 = "tosa.sub"(%1123, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1125 = "tosa.mul"(%1124, %31) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1126 = "tosa.abs"(%1125) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1127 = "tosa.mul"(%1126, %30) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1128 = "tosa.add"(%1127, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1129 = "tosa.mul"(%1126, %1126) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1130 = "tosa.mul"(%1129, %29) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1131 = "tosa.add"(%1128, %1130) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1132 = "tosa.mul"(%1129, %1126) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1133 = "tosa.mul"(%1132, %28) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1134 = "tosa.add"(%1131, %1133) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1135 = "tosa.mul"(%1132, %1126) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1136 = "tosa.mul"(%1135, %27) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1137 = "tosa.add"(%1134, %1136) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1138 = "tosa.reciprocal"(%1137) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1139 = "tosa.mul"(%1138, %1138) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1140 = "tosa.mul"(%1139, %1139) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1141 = "tosa.sub"(%32, %1140) : (tensor<1x1x1xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1142 = "tosa.greater_equal"(%1125, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xi1> | |
| %1143 = "tosa.negate"(%1141) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1144 = "tosa.select"(%1142, %1141, %1143) : (tensor<1x128x3072xi1>, tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1145 = "tosa.add"(%1144, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1146 = "tosa.mul"(%1145, %26) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1147 = "tosa.mul"(%1123, %1146) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1148 = "tosa.matmul"(%1147, %197) : (tensor<1x128x3072xf32>, tensor<1x3072x768xf32>) -> tensor<1x128x768xf32> | |
| %1149 = "tosa.reshape"(%1148) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %1150 = "tosa.add"(%67, %1149) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %1151 = "tosa.reshape"(%1150) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %1152 = "tosa.add"(%1151, %1119) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %1153 = "tosa.reduce_sum"(%1152) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %1154 = "tosa.mul"(%1153, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1155 = "tosa.sub"(%1152, %1154) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %1156 = "tosa.mul"(%1155, %1155) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %1157 = "tosa.reduce_sum"(%1156) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %1158 = "tosa.mul"(%1157, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1159 = "tosa.add"(%1158, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1160 = "tosa.rsqrt"(%1159) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %1161 = "tosa.mul"(%1155, %1160) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %1162 = "tosa.mul"(%1161, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %1163 = "tosa.add"(%1162, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %1164 = "tosa.matmul"(%1163, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %1165 = "tosa.reshape"(%1164) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %1166 = "tosa.add"(%67, %1165) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %1167 = "tosa.reshape"(%1166) {new_shape = [1, 128, 12, -1]} : (tensor<128x768xf32>) -> tensor<1x128x12x64xf32> | |
| %1168 = "tosa.transpose"(%1167, %13) : (tensor<1x128x12x64xf32>, tensor<4xi64>) -> tensor<1x12x128x64xf32> | |
| %1169 = "tosa.reshape"(%1168) {new_shape = [-1, 128, 64]} : (tensor<1x12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %1170 = "tosa.transpose"(%1169, %14) : (tensor<12x128x64xf32>, tensor<3xi32>) -> tensor<12x64x128xf32> | |
| %1171 = "tosa.matmul"(%1169, %1170) : (tensor<12x128x64xf32>, tensor<12x64x128xf32>) -> tensor<12x128x128xf32> | |
| %1172 = "tosa.mul"(%1171, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %1173 = "tosa.matmul"(%1169, %94) : (tensor<12x128x64xf32>, tensor<12x64x512xf32>) -> tensor<12x128x512xf32> | |
| %1174 = "tosa.reshape"(%1173) {new_shape = [1, 786432, 1]} : (tensor<12x128x512xf32>) -> tensor<1x786432x1xf32> | |
| %1175 = "tosa.gather"(%1174, %108) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %1176 = "tosa.reshape"(%1175) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %1177 = "tosa.mul"(%1176, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %1178 = "tosa.add"(%1177, %34) : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %1179 = "tosa.gather"(%1174, %125) : (tensor<1x786432x1xf32>, tensor<1x16384xi32>) -> tensor<1x16384x1xf32> | |
| %1180 = "tosa.reshape"(%1179) {new_shape = [12, 128, 128]} : (tensor<1x16384x1xf32>) -> tensor<12x128x128xf32> | |
| %1181 = "tosa.transpose"(%1180, %14) : (tensor<12x128x128xf32>, tensor<3xi32>) -> tensor<12x128x128xf32> | |
| %1182 = "tosa.mul"(%1181, %84) {shift = 0 : i32} : (tensor<12x128x128xf32>, tensor<1x1x1xf32>) -> tensor<12x128x128xf32> | |
| %1183 = "tosa.add"(%1178, %1182) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %1184 = "tosa.add"(%1172, %1183) : (tensor<12x128x128xf32>, tensor<12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %1185 = "tosa.reshape"(%1184) {new_shape = [-1, 12, 128, 128]} : (tensor<12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %1186 = "tosa.select"(%135, %7, %1185) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %1187 = "tosa.reduce_max"(%1186) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %1188 = "tosa.sub"(%1186, %1187) : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %1189 = "tosa.exp"(%1188) : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %1190 = "tosa.reduce_sum"(%1189) {axis = 3 : i64} : (tensor<1x12x128x128xf32>) -> tensor<1x12x128x1xf32> | |
| %1191 = "tosa.reciprocal"(%1190) : (tensor<1x12x128x1xf32>) -> tensor<1x12x128x1xf32> | |
| %1192 = "tosa.mul"(%1189, %1191) {shift = 0 : i32} : (tensor<1x12x128x128xf32>, tensor<1x12x128x1xf32>) -> tensor<1x12x128x128xf32> | |
| %1193 = "tosa.select"(%135, %33, %1192) : (tensor<1x1x128x128xi1>, tensor<f32>, tensor<1x12x128x128xf32>) -> tensor<1x12x128x128xf32> | |
| %1194 = "tosa.reshape"(%1193) {new_shape = [-1, 128, 128]} : (tensor<1x12x128x128xf32>) -> tensor<12x128x128xf32> | |
| %1195 = "tosa.matmul"(%1194, %1169) : (tensor<12x128x128xf32>, tensor<12x128x64xf32>) -> tensor<12x128x64xf32> | |
| %1196 = "tosa.reshape"(%1195) {new_shape = [-1, 12, 128, 64]} : (tensor<12x128x64xf32>) -> tensor<1x12x128x64xf32> | |
| %1197 = "tosa.transpose"(%1196, %13) : (tensor<1x12x128x64xf32>, tensor<4xi64>) -> tensor<1x128x12x64xf32> | |
| %1198 = "tosa.reshape"(%1197) {new_shape = [1, 128, 768]} : (tensor<1x128x12x64xf32>) -> tensor<1x128x768xf32> | |
| %1199 = "tosa.matmul"(%1198, %74) : (tensor<1x128x768xf32>, tensor<1x768x768xf32>) -> tensor<1x128x768xf32> | |
| %1200 = "tosa.reshape"(%1199) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %1201 = "tosa.add"(%67, %1200) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %1202 = "tosa.reshape"(%1201) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %1203 = "tosa.add"(%1202, %1163) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %1204 = "tosa.reduce_sum"(%1203) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %1205 = "tosa.mul"(%1204, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1206 = "tosa.sub"(%1203, %1205) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %1207 = "tosa.mul"(%1206, %1206) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %1208 = "tosa.reduce_sum"(%1207) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %1209 = "tosa.mul"(%1208, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1210 = "tosa.add"(%1209, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1211 = "tosa.rsqrt"(%1210) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %1212 = "tosa.mul"(%1206, %1211) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %1213 = "tosa.mul"(%1212, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %1214 = "tosa.add"(%1213, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %1215 = "tosa.matmul"(%1214, %166) : (tensor<1x128x768xf32>, tensor<1x768x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1216 = "tosa.reshape"(%1215) {new_shape = [128, 3072]} : (tensor<1x128x3072xf32>) -> tensor<128x3072xf32> | |
| %1217 = "tosa.add"(%169, %1216) : (tensor<1x3072xf32>, tensor<128x3072xf32>) -> tensor<128x3072xf32> | |
| %1218 = "tosa.reshape"(%1217) {new_shape = [1, 128, 3072]} : (tensor<128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1219 = "tosa.sub"(%1218, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1220 = "tosa.mul"(%1219, %31) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1221 = "tosa.abs"(%1220) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1222 = "tosa.mul"(%1221, %30) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1223 = "tosa.add"(%1222, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1224 = "tosa.mul"(%1221, %1221) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1225 = "tosa.mul"(%1224, %29) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1226 = "tosa.add"(%1223, %1225) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1227 = "tosa.mul"(%1224, %1221) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1228 = "tosa.mul"(%1227, %28) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1229 = "tosa.add"(%1226, %1228) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1230 = "tosa.mul"(%1227, %1221) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1231 = "tosa.mul"(%1230, %27) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1232 = "tosa.add"(%1229, %1231) : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1233 = "tosa.reciprocal"(%1232) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1234 = "tosa.mul"(%1233, %1233) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1235 = "tosa.mul"(%1234, %1234) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1236 = "tosa.sub"(%32, %1235) : (tensor<1x1x1xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1237 = "tosa.greater_equal"(%1220, %34) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xi1> | |
| %1238 = "tosa.negate"(%1236) : (tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1239 = "tosa.select"(%1237, %1236, %1238) : (tensor<1x128x3072xi1>, tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1240 = "tosa.add"(%1239, %32) : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1241 = "tosa.mul"(%1240, %26) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x1x1xf32>) -> tensor<1x128x3072xf32> | |
| %1242 = "tosa.mul"(%1218, %1241) {shift = 0 : i32} : (tensor<1x128x3072xf32>, tensor<1x128x3072xf32>) -> tensor<1x128x3072xf32> | |
| %1243 = "tosa.matmul"(%1242, %197) : (tensor<1x128x3072xf32>, tensor<1x3072x768xf32>) -> tensor<1x128x768xf32> | |
| %1244 = "tosa.reshape"(%1243) {new_shape = [128, 768]} : (tensor<1x128x768xf32>) -> tensor<128x768xf32> | |
| %1245 = "tosa.add"(%67, %1244) : (tensor<1x768xf32>, tensor<128x768xf32>) -> tensor<128x768xf32> | |
| %1246 = "tosa.reshape"(%1245) {new_shape = [1, 128, 768]} : (tensor<128x768xf32>) -> tensor<1x128x768xf32> | |
| %1247 = "tosa.add"(%1246, %1214) : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %1248 = "tosa.reduce_sum"(%1247) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %1249 = "tosa.mul"(%1248, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1250 = "tosa.sub"(%1247, %1249) : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %1251 = "tosa.mul"(%1250, %1250) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x768xf32>) -> tensor<1x128x768xf32> | |
| %1252 = "tosa.reduce_sum"(%1251) {axis = 2 : i64} : (tensor<1x128x768xf32>) -> tensor<1x128x1xf32> | |
| %1253 = "tosa.mul"(%1252, %46) {shift = 0 : i32} : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1254 = "tosa.add"(%1253, %38) : (tensor<1x128x1xf32>, tensor<1x1x1xf32>) -> tensor<1x128x1xf32> | |
| %1255 = "tosa.rsqrt"(%1254) : (tensor<1x128x1xf32>) -> tensor<1x128x1xf32> | |
| %1256 = "tosa.mul"(%1250, %1255) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x128x1xf32>) -> tensor<1x128x768xf32> | |
| %1257 = "tosa.mul"(%1256, %52) {shift = 0 : i32} : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %1258 = "tosa.add"(%1257, %52) : (tensor<1x128x768xf32>, tensor<1x1x768xf32>) -> tensor<1x128x768xf32> | |
| %1259 = "tosa.slice"(%1258) {size = [1, 1, 768], start = [0, 0, 0]} : (tensor<1x128x768xf32>) -> tensor<1x1x768xf32> | |
| %1260 = "tosa.matmul"(%1259, %74) : (tensor<1x1x768xf32>, tensor<1x768x768xf32>) -> tensor<1x1x768xf32> | |
| %1261 = "tosa.reshape"(%1260) {new_shape = [1, 768]} : (tensor<1x1x768xf32>) -> tensor<1x768xf32> | |
| %1262 = "tosa.sub"(%1261, %25) : (tensor<1x768xf32>, tensor<1x1xf32>) -> tensor<1x768xf32> | |
| %1263 = "tosa.mul"(%1262, %24) {shift = 0 : i32} : (tensor<1x768xf32>, tensor<1x1xf32>) -> tensor<1x768xf32> | |
| %1264 = "tosa.abs"(%1263) : (tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1265 = "tosa.mul"(%1264, %23) {shift = 0 : i32} : (tensor<1x768xf32>, tensor<1x1xf32>) -> tensor<1x768xf32> | |
| %1266 = "tosa.add"(%1265, %36) : (tensor<1x768xf32>, tensor<1x1xf32>) -> tensor<1x768xf32> | |
| %1267 = "tosa.mul"(%1264, %1264) {shift = 0 : i32} : (tensor<1x768xf32>, tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1268 = "tosa.mul"(%1267, %22) {shift = 0 : i32} : (tensor<1x768xf32>, tensor<1x1xf32>) -> tensor<1x768xf32> | |
| %1269 = "tosa.add"(%1266, %1268) : (tensor<1x768xf32>, tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1270 = "tosa.mul"(%1267, %1264) {shift = 0 : i32} : (tensor<1x768xf32>, tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1271 = "tosa.mul"(%1270, %21) {shift = 0 : i32} : (tensor<1x768xf32>, tensor<1x1xf32>) -> tensor<1x768xf32> | |
| %1272 = "tosa.add"(%1269, %1271) : (tensor<1x768xf32>, tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1273 = "tosa.mul"(%1270, %1264) {shift = 0 : i32} : (tensor<1x768xf32>, tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1274 = "tosa.mul"(%1273, %20) {shift = 0 : i32} : (tensor<1x768xf32>, tensor<1x1xf32>) -> tensor<1x768xf32> | |
| %1275 = "tosa.add"(%1272, %1274) : (tensor<1x768xf32>, tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1276 = "tosa.reciprocal"(%1275) : (tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1277 = "tosa.mul"(%1276, %1276) {shift = 0 : i32} : (tensor<1x768xf32>, tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1278 = "tosa.mul"(%1277, %1277) {shift = 0 : i32} : (tensor<1x768xf32>, tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1279 = "tosa.sub"(%36, %1278) : (tensor<1x1xf32>, tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1280 = "tosa.greater_equal"(%1263, %25) : (tensor<1x768xf32>, tensor<1x1xf32>) -> tensor<1x768xi1> | |
| %1281 = "tosa.negate"(%1279) : (tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1282 = "tosa.select"(%1280, %1279, %1281) : (tensor<1x768xi1>, tensor<1x768xf32>, tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1283 = "tosa.add"(%1282, %36) : (tensor<1x768xf32>, tensor<1x1xf32>) -> tensor<1x768xf32> | |
| %1284 = "tosa.mul"(%1283, %19) {shift = 0 : i32} : (tensor<1x768xf32>, tensor<1x1xf32>) -> tensor<1x768xf32> | |
| %1285 = "tosa.mul"(%1261, %1284) {shift = 0 : i32} : (tensor<1x768xf32>, tensor<1x768xf32>) -> tensor<1x768xf32> | |
| %1286 = "tosa.transpose"(%1, %12) : (tensor<2x768xf32>, tensor<2xi32>) -> tensor<768x2xf32> | |
| %1287 = "tosa.reshape"(%1285) {new_shape = [1, 1, 768]} : (tensor<1x768xf32>) -> tensor<1x1x768xf32> | |
| %1288 = "tosa.reshape"(%1286) {new_shape = [1, 768, 2]} : (tensor<768x2xf32>) -> tensor<1x768x2xf32> | |
| %1289 = "tosa.matmul"(%1287, %1288) : (tensor<1x1x768xf32>, tensor<1x768x2xf32>) -> tensor<1x1x2xf32> | |
| %1290 = "tosa.reshape"(%1289) {new_shape = [1, 2]} : (tensor<1x1x2xf32>) -> tensor<1x2xf32> | |
| return %1290 : tensor<1x2xf32> | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment