Last active
May 1, 2024 13:41
-
-
Save geohot/7c9f10f5770f058a1de6ef0598e4c9d8 to your computer and use it in GitHub Desktop.
Outputted llm.c from tinygrad
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <tgmath.h> | |
#define max(x,y) ((x>y)?x:y) | |
#define half __fp16 | |
void E_(int* data0) { | |
int val0 = data0[0]; | |
data0[0] = (val0+1); | |
} | |
void r_64_64(int* data0) { | |
for (int ridx0 = 0; ridx0 < 64; ridx0++) { | |
data0[ridx0] = (ridx0+1+(-1)); | |
} | |
} | |
void E_64_64(bool* data0, const float* data1) { | |
for (int ridx0 = 0; ridx0 < 64; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 64; ridx1++) { | |
float val0 = data1[(ridx0*1024)+ridx1]; | |
data0[(ridx0*64)+ridx1] = (0.0f==val0); | |
} | |
} | |
} | |
void r_50257_50257(int* data0) { | |
for (int ridx0 = 0; ridx0 < 50257; ridx0++) { | |
data0[ridx0] = (ridx0+1+(-1)); | |
} | |
} | |
void E_n1(float* data0, const float* data1) { | |
float val0 = data1[0]; | |
data0[0] = (1.0f-val0); | |
} | |
void E_n2(float* data0, const float* data1, const int* data2) { | |
float val0 = data1[0]; | |
int val1 = data2[0]; | |
float alu0 = max(val0,0.0f); | |
float alu1 = max((-val0),0.0f); | |
float alu2 = (alu0+alu1); | |
float cast0 = (float)(val1); | |
float alu3 = (val0/(alu2+1e-12f)); | |
float alu4 = max(alu3,0.0f); | |
float alu5 = max((-alu3),0.0f); | |
float alu6 = (((alu3-1.0f)*(-0.5f))-(1.5f*(1.0f-(alu4+alu5)))); | |
data0[0] = (1.0f-(exp2((log2(alu2)*0.6931471805599453f*cast0*1.4426950408889634f))*((sin((1.5707963267948966f-(cast0*3.141592653589793f)))*alu6)+(1.0f-alu6)))); | |
} | |
void r_64_768_1024(float* data0, const int* data1, const int* data2, const float* data3) { | |
for (int ridx0 = 0; ridx0 < 64; ridx0++) { | |
int val0 = data1[ridx0]; | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float acc0 = 0.0f; | |
for (int ridx2 = 0; ridx2 < 1024; ridx2++) { | |
int val1 = data2[ridx2]; | |
float val2 = data3[ridx1+(ridx2*768)]; | |
acc0 = (((float)((val0==val1))*val2)+acc0); | |
} | |
data0[(ridx0*768)+ridx1] = acc0; | |
} | |
} | |
} | |
void E_256(int* data0, const int* data1) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
int val0 = data1[ridx0]; | |
data0[ridx0] = (int)((((-1)==val0)==false)); | |
} | |
} | |
void r_256(int* data0, const int* data1) { | |
int acc0 = 0; | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
int val0 = data1[ridx0]; | |
acc0 = ((int)((((-1)==val0)==false))+acc0); | |
} | |
data0[0] = acc0; | |
} | |
void r_4_64_768_50257(float* data0, const int* data1, const int* data2, const float* data3, const float* data4) { | |
for (int ridx0 = 0; ridx0 < 4; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 64; ridx1++) { | |
int val0 = data1[(ridx0*64)+ridx1]; | |
int alu0 = (ridx1*768); | |
for (int ridx2 = 0; ridx2 < 768; ridx2++) { | |
float acc0 = 0.0f; | |
float val1 = data4[alu0+ridx2]; | |
for (int ridx3 = 0; ridx3 < 50257; ridx3++) { | |
int val2 = data2[ridx3]; | |
float val3 = data3[ridx2+(ridx3*768)]; | |
acc0 = (((float)((val0==val2))*val3)+acc0); | |
} | |
data0[(ridx0*49152)+alu0+ridx2] = (acc0+val1); | |
} | |
} | |
} | |
} | |
void E_n3(float* data0, const int* data1) { | |
int val0 = data1[0]; | |
data0[0] = (1.0f/(float)(val0)); | |
} | |
void r_256_768(float* data0, const float* data1) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float acc0 = 0.0f; | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float val0 = data1[(ridx0*768)+ridx1]; | |
acc0 = (val0+acc0); | |
} | |
data0[ridx0] = (acc0*0.0013020833333333333f); | |
} | |
} | |
void r_256_768n1(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float acc0 = 0.0f; | |
float val0 = data2[ridx0]; | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float val1 = data1[(ridx0*768)+ridx1]; | |
float alu0 = (val1-val0); | |
acc0 = ((alu0*alu0)+acc0); | |
} | |
data0[ridx0] = (1.0f/((acc0*0.0013020833333333333f)+1e-05f)); | |
} | |
} | |
void E_256n1(float* data0, const float* data1) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float val0 = data1[ridx0]; | |
data0[ridx0] = sqrt(val0); | |
} | |
} | |
void E_256_768(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float val0 = data2[ridx0]; | |
float val1 = data3[ridx0]; | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
int alu0 = ((ridx0*768)+ridx1); | |
float val2 = data1[alu0]; | |
float val3 = data4[ridx1]; | |
float val4 = data5[ridx1]; | |
data0[alu0] = (((val2-val0)*val1*val3)+val4); | |
} | |
} | |
} | |
void r_256_2304_768(float* data0, const float* data1, const float* data2, const float* data3) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 2304; ridx1++) { | |
float acc0 = 0.0f; | |
float val0 = data3[ridx1]; | |
for (int ridx2 = 0; ridx2 < 768; ridx2++) { | |
float val1 = data1[(ridx0*768)+ridx2]; | |
float val2 = data2[(ridx1*768)+ridx2]; | |
acc0 = ((val1*val2)+acc0); | |
} | |
data0[(ridx0*2304)+ridx1] = (acc0+val0); | |
} | |
} | |
} | |
void r_4_12_64_64_64(float* data0, const bool* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 4; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 12; ridx1++) { | |
int alu0 = ((ridx0*147456)+(ridx1*64)); | |
for (int ridx2 = 0; ridx2 < 64; ridx2++) { | |
int alu1 = (ridx2*64); | |
for (int ridx3 = 0; ridx3 < 64; ridx3++) { | |
float acc0 = 0.0f; | |
bool val0 = data1[alu1+ridx3]; | |
for (int ridx4 = 0; ridx4 < 64; ridx4++) { | |
float val1 = data2[alu0+(ridx2*2304)+ridx4]; | |
float val2 = data2[alu0+(ridx3*2304)+ridx4+768]; | |
acc0 = ((val1*val2)+acc0); | |
} | |
data0[(ridx0*49152)+(ridx1*4096)+alu1+ridx3] = (val0?(-INFINITY):(acc0*0.125f)); | |
} | |
} | |
} | |
} | |
} | |
void r_3072_64(float* data0, const float* data1) { | |
for (int ridx0 = 0; ridx0 < 3072; ridx0++) { | |
float acc0 = -INFINITY; | |
for (int ridx1 = 0; ridx1 < 64; ridx1++) { | |
float val0 = data1[(ridx0*64)+ridx1]; | |
float alu0 = max(val0,acc0); | |
acc0 = alu0; | |
} | |
data0[ridx0] = acc0; | |
} | |
} | |
void r_3072_64n1(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 3072; ridx0++) { | |
float acc0 = 0.0f; | |
float val0 = data2[ridx0]; | |
for (int ridx1 = 0; ridx1 < 64; ridx1++) { | |
float val1 = data1[(ridx0*64)+ridx1]; | |
acc0 = (exp2(((val1-val0)*1.4426950408889634f))+acc0); | |
} | |
data0[ridx0] = acc0; | |
} | |
} | |
void r_3072_64n2(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 3072; ridx0++) { | |
float acc0 = 0.0f; | |
float val0 = data2[ridx0]; | |
for (int ridx1 = 0; ridx1 < 64; ridx1++) { | |
float val1 = data1[(ridx0*64)+ridx1]; | |
acc0 = ((float)((val1==val0))+acc0); | |
} | |
data0[ridx0] = acc0; | |
} | |
} | |
void E_3072_64(float* data0, const float* data1, const float* data2, const float* data3) { | |
for (int ridx0 = 0; ridx0 < 3072; ridx0++) { | |
float val0 = data2[ridx0]; | |
float val1 = data3[ridx0]; | |
for (int ridx1 = 0; ridx1 < 64; ridx1++) { | |
int alu0 = ((ridx0*64)+ridx1); | |
float val2 = data1[alu0]; | |
data0[alu0] = (exp2(((val2-val0)*1.4426950408889634f))/val1); | |
} | |
} | |
} | |
void r_4_12_64_64_64n1(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 4; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 12; ridx1++) { | |
for (int ridx2 = 0; ridx2 < 64; ridx2++) { | |
int alu0 = ((ridx0*49152)+(ridx1*4096)+(ridx2*64)); | |
for (int ridx3 = 0; ridx3 < 64; ridx3++) { | |
float acc0 = 0.0f; | |
for (int ridx4 = 0; ridx4 < 64; ridx4++) { | |
float val0 = data1[alu0+ridx4]; | |
float val1 = data2[(ridx0*147456)+(ridx1*64)+ridx3+(ridx4*2304)+1536]; | |
acc0 = ((val0*val1)+acc0); | |
} | |
data0[alu0+ridx3] = acc0; | |
} | |
} | |
} | |
} | |
} | |
void r_4_64_768_768(float* data0, const float* data1, const float* data2, const float* data3, const float* data4) { | |
for (int ridx0 = 0; ridx0 < 4; ridx0++) { | |
int alu0 = (ridx0*49152); | |
for (int ridx1 = 0; ridx1 < 64; ridx1++) { | |
for (int ridx2 = 0; ridx2 < 768; ridx2++) { | |
float acc0 = 0.0f; | |
int alu1 = (alu0+(ridx1*768)+ridx2); | |
float val0 = data1[alu1]; | |
float val1 = data4[ridx2]; | |
for (int ridx3 = 0; ridx3 < 768; ridx3++) { | |
float val2 = data2[alu0+(ridx1*64)+((ridx3/64)*4096)+(ridx3%64)]; | |
float val3 = data3[(ridx2*768)+ridx3]; | |
acc0 = ((val2*val3)+acc0); | |
} | |
data0[alu1] = (val0+acc0+val1); | |
} | |
} | |
} | |
} | |
void r_256_3072_768(float* data0, const float* data1, const float* data2, const float* data3) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 3072; ridx1++) { | |
float acc0 = 0.0f; | |
float val0 = data3[ridx1]; | |
for (int ridx2 = 0; ridx2 < 768; ridx2++) { | |
float val1 = data1[(ridx0*768)+ridx2]; | |
float val2 = data2[(ridx1*768)+ridx2]; | |
acc0 = ((val1*val2)+acc0); | |
} | |
data0[(ridx0*3072)+ridx1] = (acc0+val0); | |
} | |
} | |
} | |
void E_786432(float* data0, const float* data1) { | |
for (int ridx0 = 0; ridx0 < 786432; ridx0++) { | |
float val0 = data1[ridx0]; | |
data0[ridx0] = (0.5f*val0*(1.0f+((2.0f*(1.0f/(1.0f+exp2((2.0f*val0*0.7978845608f*(1.0f+(0.044715f*val0*val0))*(-1.4426950408889634f))))))-1.0f))); | |
} | |
} | |
void r_256_768_3072(float* data0, const float* data1, const float* data2, const float* data3, const float* data4) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float acc0 = 0.0f; | |
int alu0 = ((ridx0*768)+ridx1); | |
float val0 = data1[alu0]; | |
float val1 = data4[ridx1]; | |
for (int ridx2 = 0; ridx2 < 3072; ridx2++) { | |
float val2 = data2[(ridx0*3072)+ridx2]; | |
float val3 = data3[(ridx1*3072)+ridx2]; | |
acc0 = ((val2*val3)+acc0); | |
} | |
data0[alu0] = (val0+acc0+val1); | |
} | |
} | |
} | |
void r_256_50257_768(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 50257; ridx1++) { | |
float acc0 = 0.0f; | |
for (int ridx2 = 0; ridx2 < 768; ridx2++) { | |
float val0 = data1[(ridx0*768)+ridx2]; | |
float val1 = data2[(ridx1*768)+ridx2]; | |
acc0 = ((val0*val1)+acc0); | |
} | |
data0[(ridx0*50257)+ridx1] = acc0; | |
} | |
} | |
} | |
void r_256_50257(float* data0, const float* data1) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float acc0 = -INFINITY; | |
for (int ridx1 = 0; ridx1 < 50257; ridx1++) { | |
float val0 = data1[(ridx0*50257)+ridx1]; | |
float alu0 = max(val0,acc0); | |
acc0 = alu0; | |
} | |
data0[ridx0] = acc0; | |
} | |
} | |
void r_256_50257n1(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float acc0 = 0.0f; | |
float val0 = data2[ridx0]; | |
for (int ridx1 = 0; ridx1 < 50257; ridx1++) { | |
float val1 = data1[(ridx0*50257)+ridx1]; | |
acc0 = ((float)((val1==val0))+acc0); | |
} | |
data0[ridx0] = acc0; | |
} | |
} | |
void r_256_50257n2(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float acc0 = 0.0f; | |
float val0 = data2[ridx0]; | |
for (int ridx1 = 0; ridx1 < 50257; ridx1++) { | |
float val1 = data1[(ridx0*50257)+ridx1]; | |
acc0 = (exp2(((val1-val0)*1.4426950408889634f))+acc0); | |
} | |
data0[ridx0] = acc0; | |
} | |
} | |
void E_256n2(float* data0, const float* data1) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float val0 = data1[ridx0]; | |
data0[ridx0] = (log2(val0)*0.6931471805599453f); | |
} | |
} | |
void r_256_50257n3(float* data0, const int* data1, const int* data2, const int* data3, const float* data4, const float* data5) { | |
float val0 = data4[0]; | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float acc0 = 0.0f; | |
int val1 = data1[ridx0]; | |
int val2 = data3[ridx0]; | |
float val3 = data5[ridx0]; | |
for (int ridx1 = 0; ridx1 < 50257; ridx1++) { | |
int val4 = data2[ridx1]; | |
acc0 = ((-((float)((((val1==val4)?(-1):0)*val2))*val0))+acc0); | |
} | |
data0[ridx0] = (acc0/val3); | |
} | |
} | |
void r_256_50257n4(float* data0, const float* data1, const float* data2, const float* data3, const int* data4, const int* data5, const int* data6, const int* data7) { | |
float acc0 = 0.0f; | |
int val0 = data7[0]; | |
float cast0 = (float)(val0); | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float val1 = data2[ridx0]; | |
float val2 = data3[ridx0]; | |
int val3 = data4[ridx0]; | |
int val4 = data6[ridx0]; | |
for (int ridx1 = 0; ridx1 < 50257; ridx1++) { | |
float val5 = data1[(ridx0*50257)+ridx1]; | |
int val6 = data5[ridx1]; | |
acc0 = ((((val5-val1)-val2)*(float)((((val3==val6)?(-1):0)*val4)))+acc0); | |
} | |
} | |
data0[0] = ((acc0/cast0)+(0.0f/cast0)); | |
} | |
void r_256_50257n5(float* data0, const int* data1, const int* data2, const int* data3, const float* data4, const float* data5, const float* data6, const float* data7) { | |
float val0 = data4[0]; | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float acc0 = 0.0f; | |
int val1 = data1[ridx0]; | |
int val2 = data3[ridx0]; | |
float val3 = data6[ridx0]; | |
float val4 = data7[ridx0]; | |
for (int ridx1 = 0; ridx1 < 50257; ridx1++) { | |
int val5 = data2[ridx1]; | |
float val6 = data5[(ridx0*50257)+ridx1]; | |
acc0 = ((-(((float)((((val1==val5)?(-1):0)*val2))*val0)+(exp2(((val6-val3)*1.4426950408889634f))*val4)))+acc0); | |
} | |
data0[ridx0] = acc0; | |
} | |
} | |
void E_256_50257(float* data0, const int* data1, const int* data2, const int* data3, const float* data4, const float* data5, const float* data6, const float* data7, const float* data8, const float* data9) { | |
float val0 = data4[0]; | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
int val1 = data1[ridx0]; | |
int val2 = data3[ridx0]; | |
float val3 = data6[ridx0]; | |
float val4 = data7[ridx0]; | |
float val5 = data8[ridx0]; | |
float val6 = data9[ridx0]; | |
for (int ridx1 = 0; ridx1 < 50257; ridx1++) { | |
int alu0 = ((ridx0*50257)+ridx1); | |
int val7 = data2[ridx1]; | |
float val8 = data5[alu0]; | |
data0[alu0] = (((float)((((val1==val7)?(-1):0)*val2))*val0)+(exp2(((val8-val3)*1.4426950408889634f))*val4)+(((float)((val8==val3))/val5)*val6)); | |
} | |
} | |
} | |
void r_256_768_50257(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float acc0 = 0.0f; | |
for (int ridx2 = 0; ridx2 < 50257; ridx2++) { | |
float val0 = data1[ridx1+(ridx2*768)]; | |
float val1 = data2[(ridx0*50257)+ridx2]; | |
acc0 = ((val0*val1)+acc0); | |
} | |
data0[(ridx0*768)+ridx1] = acc0; | |
} | |
} | |
} | |
void r2_768_256(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7, const float* data8, const float* data9) { | |
float val0 = data2[0]; | |
float val1 = data3[0]; | |
float val2 = data8[0]; | |
float val3 = data9[0]; | |
for (int ridx0 = 0; ridx0 < 768; ridx0++) { | |
float acc0 = 0.0f; | |
float val4 = data0[ridx0]; | |
float val5 = data1[ridx0]; | |
for (int ridx1 = 0; ridx1 < 256; ridx1++) { | |
int alu0 = (ridx0+(ridx1*768)); | |
float val6 = data4[alu0]; | |
float val7 = data5[ridx1]; | |
float val8 = data6[ridx1]; | |
float val9 = data7[alu0]; | |
acc0 = (((val6-val7)*val8*val9)+acc0); | |
} | |
data0[ridx0] = ((val0*val4)+(val1*acc0)); | |
data1[ridx0] = ((val2*val5)+(val3*acc0*acc0)); | |
} | |
} | |
void r2_768_256n1(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6) { | |
float val0 = data2[0]; | |
float val1 = data3[0]; | |
float val2 = data5[0]; | |
float val3 = data6[0]; | |
for (int ridx0 = 0; ridx0 < 768; ridx0++) { | |
float acc0 = 0.0f; | |
float val4 = data0[ridx0]; | |
float val5 = data1[ridx0]; | |
for (int ridx1 = 0; ridx1 < 256; ridx1++) { | |
float val6 = data4[ridx0+(ridx1*768)]; | |
acc0 = (val6+acc0); | |
} | |
data0[ridx0] = ((val0*val4)+(val1*acc0)); | |
data1[ridx0] = ((val2*val5)+(val3*acc0*acc0)); | |
} | |
} | |
void r_256_768n2(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float acc0 = 0.0f; | |
float val0 = data2[ridx0]; | |
float val1 = data5[ridx0]; | |
float val2 = data6[ridx0]; | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
int alu0 = ((ridx0*768)+ridx1); | |
float val3 = data1[alu0]; | |
float val4 = data3[ridx1]; | |
float val5 = data4[alu0]; | |
acc0 = (((val3-val0)*val4*val5)+acc0); | |
} | |
data0[ridx0] = ((-(acc0/(val1*2.0f)))*val2*val2*0.0013020833333333333f); | |
} | |
} | |
void E_768(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) { | |
float val0 = data1[0]; | |
float val1 = data3[0]; | |
float val2 = data5[0]; | |
for (int ridx0 = 0; ridx0 < 768; ridx0++) { | |
float val3 = data0[ridx0]; | |
float val4 = data2[ridx0]; | |
float val5 = data4[ridx0]; | |
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f)))); | |
} | |
} | |
void r_256_768n3(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float acc0 = 0.0f; | |
float val0 = data1[ridx0]; | |
float val1 = data5[ridx0]; | |
float val2 = data6[ridx0]; | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float val3 = data2[ridx1]; | |
int alu0 = ((ridx0*768)+ridx1); | |
float val4 = data3[alu0]; | |
float val5 = data4[alu0]; | |
float alu1 = ((val5-val1)*val2); | |
acc0 = ((-((val0*val3*val4)+alu1+alu1))+acc0); | |
} | |
data0[ridx0] = (acc0*0.0013020833333333333f); | |
} | |
} | |
void E_256_768n1(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float val0 = data1[ridx0]; | |
float val1 = data5[ridx0]; | |
float val2 = data6[ridx0]; | |
float val3 = data7[ridx0]; | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
int alu0 = ((ridx0*768)+ridx1); | |
float val4 = data2[ridx1]; | |
float val5 = data3[alu0]; | |
float val6 = data4[alu0]; | |
float alu1 = ((val6-val1)*val2); | |
data0[alu0] = ((val0*val4*val5)+alu1+alu1+val3); | |
} | |
} | |
} | |
void r2_768_3072_256(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7) { | |
float val0 = data2[0]; | |
float val1 = data3[0]; | |
float val2 = data6[0]; | |
float val3 = data7[0]; | |
for (int ridx0 = 0; ridx0 < 768; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 3072; ridx1++) { | |
float acc0 = 0.0f; | |
int alu0 = ((ridx0*3072)+ridx1); | |
float val4 = data0[alu0]; | |
float val5 = data1[alu0]; | |
for (int ridx2 = 0; ridx2 < 256; ridx2++) { | |
float val6 = data4[ridx1+(ridx2*3072)]; | |
float val7 = data5[ridx0+(ridx2*768)]; | |
acc0 = ((val6*val7)+acc0); | |
} | |
data0[alu0] = ((val0*val4)+(val1*acc0)); | |
data1[alu0] = ((val2*val5)+(val3*acc0*acc0)); | |
} | |
} | |
} | |
void r_256_3072_768n1(float* data0, const float* data1, const float* data2, const float* data3) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 3072; ridx1++) { | |
float acc0 = 0.0f; | |
int alu0 = ((ridx0*3072)+ridx1); | |
float val0 = data1[alu0]; | |
float alu1 = (0.044715f*val0); | |
float alu2 = (val0*0.7978845608f); | |
float alu3 = (1.0f+(alu1*val0)); | |
float alu4 = (1.0f/(1.0f+exp2((2.0f*alu2*alu3*(-1.4426950408889634f))))); | |
for (int ridx2 = 0; ridx2 < 768; ridx2++) { | |
float val1 = data2[ridx1+(ridx2*3072)]; | |
float val2 = data3[(ridx0*768)+ridx2]; | |
acc0 = ((val1*val2)+acc0); | |
} | |
float alu5 = (2.0f*alu4*(1.0f-alu4)*2.0f*0.5f*val0*acc0); | |
float alu6 = (alu2*alu5); | |
data0[alu0] = ((alu1*alu6)+(0.044715f*val0*alu6)+(0.7978845608f*alu3*alu5)+(0.5f*(1.0f+((2.0f*alu4)-1.0f))*acc0)); | |
} | |
} | |
} | |
void r2_3072_768_256(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7) { | |
float val0 = data2[0]; | |
float val1 = data3[0]; | |
float val2 = data6[0]; | |
float val3 = data7[0]; | |
for (int ridx0 = 0; ridx0 < 3072; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float acc0 = 0.0f; | |
int alu0 = ((ridx0*768)+ridx1); | |
float val4 = data0[alu0]; | |
float val5 = data1[alu0]; | |
for (int ridx2 = 0; ridx2 < 256; ridx2++) { | |
float val6 = data4[ridx1+(ridx2*768)]; | |
float val7 = data5[ridx0+(ridx2*3072)]; | |
acc0 = ((val6*val7)+acc0); | |
} | |
data0[alu0] = ((val0*val4)+(val1*acc0)); | |
data1[alu0] = ((val2*val5)+(val3*acc0*acc0)); | |
} | |
} | |
} | |
void r2_3072_256(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6) { | |
float val0 = data2[0]; | |
float val1 = data3[0]; | |
float val2 = data5[0]; | |
float val3 = data6[0]; | |
for (int ridx0 = 0; ridx0 < 3072; ridx0++) { | |
float acc0 = 0.0f; | |
float val4 = data0[ridx0]; | |
float val5 = data1[ridx0]; | |
for (int ridx1 = 0; ridx1 < 256; ridx1++) { | |
float val6 = data4[ridx0+(ridx1*3072)]; | |
acc0 = (val6+acc0); | |
} | |
data0[ridx0] = ((val0*val4)+(val1*acc0)); | |
data1[ridx0] = ((val2*val5)+(val3*acc0*acc0)); | |
} | |
} | |
void E_2359296(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) { | |
float val0 = data1[0]; | |
float val1 = data3[0]; | |
float val2 = data5[0]; | |
for (int ridx0 = 0; ridx0 < 2359296; ridx0++) { | |
float val3 = data0[ridx0]; | |
float val4 = data2[ridx0]; | |
float val5 = data4[ridx0]; | |
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f)))); | |
} | |
} | |
void r_256_768_3072n1(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float acc0 = 0.0f; | |
for (int ridx2 = 0; ridx2 < 3072; ridx2++) { | |
float val0 = data1[ridx1+(ridx2*768)]; | |
float val1 = data2[(ridx0*3072)+ridx2]; | |
acc0 = ((val0*val1)+acc0); | |
} | |
data0[(ridx0*768)+ridx1] = acc0; | |
} | |
} | |
} | |
void E_3072(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) { | |
float val0 = data1[0]; | |
float val1 = data3[0]; | |
float val2 = data5[0]; | |
for (int ridx0 = 0; ridx0 < 3072; ridx0++) { | |
float val3 = data0[ridx0]; | |
float val4 = data2[ridx0]; | |
float val5 = data4[ridx0]; | |
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f)))); | |
} | |
} | |
void E_2359296n1(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) { | |
float val0 = data1[0]; | |
float val1 = data3[0]; | |
float val2 = data5[0]; | |
for (int ridx0 = 0; ridx0 < 2359296; ridx0++) { | |
float val3 = data0[ridx0]; | |
float val4 = data2[ridx0]; | |
float val5 = data4[ridx0]; | |
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f)))); | |
} | |
} | |
void E_256_768n2(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7, const float* data8) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
float val0 = data2[ridx0]; | |
float val1 = data6[ridx0]; | |
float val2 = data7[ridx0]; | |
float val3 = data8[ridx0]; | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
int alu0 = ((ridx0*768)+ridx1); | |
float val4 = data1[alu0]; | |
float val5 = data3[ridx1]; | |
float val6 = data4[alu0]; | |
float val7 = data5[alu0]; | |
float alu1 = ((val7-val1)*val2); | |
data0[alu0] = (val4+(val0*val5*val6)+alu1+alu1+val3); | |
} | |
} | |
} | |
void r2_768_768_4_64(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7) { | |
float val0 = data2[0]; | |
float val1 = data3[0]; | |
float val2 = data6[0]; | |
float val3 = data7[0]; | |
for (int ridx0 = 0; ridx0 < 768; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float acc0 = 0.0f; | |
int alu0 = ((ridx0*768)+ridx1); | |
float val4 = data0[alu0]; | |
float val5 = data1[alu0]; | |
for (int ridx2 = 0; ridx2 < 4; ridx2++) { | |
int alu1 = (ridx2*49152); | |
for (int ridx3 = 0; ridx3 < 64; ridx3++) { | |
float val6 = data4[alu1+(ridx3*64)+((ridx1/64)*4096)+(ridx1%64)]; | |
float val7 = data5[ridx0+alu1+(ridx3*768)]; | |
acc0 = ((val6*val7)+acc0); | |
} | |
} | |
data0[alu0] = ((val0*val4)+(val1*acc0)); | |
data1[alu0] = ((val2*val5)+(val3*acc0*acc0)); | |
} | |
} | |
} | |
void r_256_768_768(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
int alu0 = (ridx0*768); | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float acc0 = 0.0f; | |
for (int ridx2 = 0; ridx2 < 768; ridx2++) { | |
float val0 = data1[ridx1+(ridx2*768)]; | |
float val1 = data2[alu0+ridx2]; | |
acc0 = ((val0*val1)+acc0); | |
} | |
data0[alu0+ridx1] = acc0; | |
} | |
} | |
} | |
void E_589824(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) { | |
float val0 = data1[0]; | |
float val1 = data3[0]; | |
float val2 = data5[0]; | |
for (int ridx0 = 0; ridx0 < 589824; ridx0++) { | |
float val3 = data0[ridx0]; | |
float val4 = data2[ridx0]; | |
float val5 = data4[ridx0]; | |
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f)))); | |
} | |
} | |
void r_4_12_64_64_64n2(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 4; ridx0++) { | |
int alu0 = (ridx0*49152); | |
for (int ridx1 = 0; ridx1 < 12; ridx1++) { | |
int alu1 = (alu0+(ridx1*4096)); | |
for (int ridx2 = 0; ridx2 < 64; ridx2++) { | |
for (int ridx3 = 0; ridx3 < 64; ridx3++) { | |
float acc0 = 0.0f; | |
for (int ridx4 = 0; ridx4 < 64; ridx4++) { | |
float val0 = data1[alu1+ridx3+(ridx4*64)]; | |
float val1 = data2[alu0+(ridx1*64)+ridx2+(ridx4*768)]; | |
acc0 = ((val0*val1)+acc0); | |
} | |
data0[alu1+(ridx2*64)+ridx3] = acc0; | |
} | |
} | |
} | |
} | |
} | |
void r_4_12_64_64_64n3(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 4; ridx0++) { | |
int alu0 = (ridx0*49152); | |
for (int ridx1 = 0; ridx1 < 12; ridx1++) { | |
int alu1 = (ridx1*64); | |
for (int ridx2 = 0; ridx2 < 64; ridx2++) { | |
for (int ridx3 = 0; ridx3 < 64; ridx3++) { | |
float acc0 = 0.0f; | |
for (int ridx4 = 0; ridx4 < 64; ridx4++) { | |
float val0 = data1[(ridx0*147456)+alu1+(ridx3*2304)+ridx4+1536]; | |
float val1 = data2[alu0+alu1+(ridx2*768)+ridx4]; | |
acc0 = ((val0*val1)+acc0); | |
} | |
data0[alu0+(ridx1*4096)+(ridx2*64)+ridx3] = acc0; | |
} | |
} | |
} | |
} | |
} | |
void r_3072_64n3(float* data0, const float* data1, const float* data2, const float* data3, const float* data4) { | |
for (int ridx0 = 0; ridx0 < 3072; ridx0++) { | |
float acc0 = 0.0f; | |
float val0 = data3[ridx0]; | |
float val1 = data4[ridx0]; | |
for (int ridx1 = 0; ridx1 < 64; ridx1++) { | |
int alu0 = ((ridx0*64)+ridx1); | |
float val2 = data1[alu0]; | |
float val3 = data2[alu0]; | |
acc0 = ((((-val2)*exp2(((val3-val0)*1.4426950408889634f)))/(val1*val1))+acc0); | |
} | |
data0[ridx0] = acc0; | |
} | |
} | |
void r_3072_64n4(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) { | |
for (int ridx0 = 0; ridx0 < 3072; ridx0++) { | |
float acc0 = 0.0f; | |
float val0 = data2[ridx0]; | |
float val1 = data4[ridx0]; | |
float val2 = data5[ridx0]; | |
for (int ridx1 = 0; ridx1 < 64; ridx1++) { | |
int alu0 = ((ridx0*64)+ridx1); | |
float val3 = data1[alu0]; | |
float val4 = data3[alu0]; | |
acc0 = ((-(exp2(((val3-val0)*1.4426950408889634f))*((val4/val1)+val2)))+acc0); | |
} | |
data0[ridx0] = acc0; | |
} | |
} | |
void E_48_64_64(float* data0, const bool* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7, const float* data8) { | |
for (int ridx0 = 0; ridx0 < 48; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 64; ridx1++) { | |
int alu0 = (ridx1*64); | |
int alu1 = ((ridx0*64)+ridx1); | |
float val0 = data3[alu1]; | |
float val1 = data5[alu1]; | |
float val2 = data6[alu1]; | |
float val3 = data7[alu1]; | |
float val4 = data8[alu1]; | |
for (int ridx2 = 0; ridx2 < 64; ridx2++) { | |
int alu2 = ((ridx0*4096)+alu0+ridx2); | |
bool val5 = data1[alu0+ridx2]; | |
float val6 = data2[alu2]; | |
float val7 = data4[alu2]; | |
data0[alu2] = (0.125f*(val5?0.0f:((exp2(((val6-val0)*1.4426950408889634f))*((val7/val1)+val2))+(((float)((val6==val0))/val3)*val4)))); | |
} | |
} | |
} | |
} | |
void r_4_12_64_64_64n4(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 4; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 12; ridx1++) { | |
int alu0 = ((ridx0*49152)+(ridx1*4096)); | |
for (int ridx2 = 0; ridx2 < 64; ridx2++) { | |
for (int ridx3 = 0; ridx3 < 64; ridx3++) { | |
float acc0 = 0.0f; | |
for (int ridx4 = 0; ridx4 < 64; ridx4++) { | |
float val0 = data1[(ridx0*147456)+(ridx1*64)+ridx3+(ridx4*2304)]; | |
float val1 = data2[alu0+ridx2+(ridx4*64)]; | |
acc0 = ((val0*val1)+acc0); | |
} | |
data0[alu0+(ridx2*64)+ridx3] = acc0; | |
} | |
} | |
} | |
} | |
} | |
void r_4_12_64_64_64n5(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 4; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 12; ridx1++) { | |
for (int ridx2 = 0; ridx2 < 64; ridx2++) { | |
int alu0 = ((ridx0*49152)+(ridx1*4096)+(ridx2*64)); | |
for (int ridx3 = 0; ridx3 < 64; ridx3++) { | |
float acc0 = 0.0f; | |
for (int ridx4 = 0; ridx4 < 64; ridx4++) { | |
float val0 = data1[(ridx0*147456)+(ridx1*64)+ridx3+(ridx4*2304)+768]; | |
float val1 = data2[alu0+ridx4]; | |
acc0 = ((val0*val1)+acc0); | |
} | |
data0[alu0+ridx3] = acc0; | |
} | |
} | |
} | |
} | |
} | |
void E_4_64_2304(float* data0, const float* data1, const float* data2, const float* data3) { | |
for (int ridx0 = 0; ridx0 < 4; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 64; ridx1++) { | |
for (int ridx2 = 0; ridx2 < 2304; ridx2++) { | |
int alu0 = (ridx2*(-1)); | |
float val0 = ((alu0<(-1535))?data1[(ridx0*49152)+ridx1+(ridx2*64)+(-98304)]:0.0f); | |
int alu1 = (ridx1+(ridx2/768)); | |
int alu2 = (alu1+63); | |
int alu3 = (((ridx2/64)%12)*4096); | |
int alu4 = (ridx2%64); | |
float val1 = (((alu0<(-767))*(ridx2<1536))?data2[(((ridx0+(alu2/64)+3)%4)*49152)+((alu2%64)*64)+alu3+alu4]:0.0f); | |
float val2 = ((ridx2<768)?data3[(((ridx0+(alu1/64))%4)*49152)+((alu1%64)*64)+alu3+alu4]:0.0f); | |
data0[(ridx0*147456)+(ridx1*2304)+ridx2] = (val0+val1+val2); | |
} | |
} | |
} | |
} | |
void r2_2304_768_256(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7) { | |
float val0 = data2[0]; | |
float val1 = data3[0]; | |
float val2 = data6[0]; | |
float val3 = data7[0]; | |
for (int ridx0 = 0; ridx0 < 2304; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float acc0 = 0.0f; | |
int alu0 = ((ridx0*768)+ridx1); | |
float val4 = data0[alu0]; | |
float val5 = data1[alu0]; | |
for (int ridx2 = 0; ridx2 < 256; ridx2++) { | |
float val6 = data4[ridx1+(ridx2*768)]; | |
float val7 = data5[ridx0+(ridx2*2304)]; | |
acc0 = ((val6*val7)+acc0); | |
} | |
data0[alu0] = ((val0*val4)+(val1*acc0)); | |
data1[alu0] = ((val2*val5)+(val3*acc0*acc0)); | |
} | |
} | |
} | |
void r2_2304_256(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6) { | |
float val0 = data2[0]; | |
float val1 = data3[0]; | |
float val2 = data5[0]; | |
float val3 = data6[0]; | |
for (int ridx0 = 0; ridx0 < 2304; ridx0++) { | |
float acc0 = 0.0f; | |
float val4 = data0[ridx0]; | |
float val5 = data1[ridx0]; | |
for (int ridx1 = 0; ridx1 < 256; ridx1++) { | |
float val6 = data4[ridx0+(ridx1*2304)]; | |
acc0 = (val6+acc0); | |
} | |
data0[ridx0] = ((val0*val4)+(val1*acc0)); | |
data1[ridx0] = ((val2*val5)+(val3*acc0*acc0)); | |
} | |
} | |
void r_256_768_2304(float* data0, const float* data1, const float* data2) { | |
for (int ridx0 = 0; ridx0 < 256; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float acc0 = 0.0f; | |
for (int ridx2 = 0; ridx2 < 2304; ridx2++) { | |
float val0 = data1[ridx1+(ridx2*768)]; | |
float val1 = data2[(ridx0*2304)+ridx2]; | |
acc0 = ((val0*val1)+acc0); | |
} | |
data0[(ridx0*768)+ridx1] = acc0; | |
} | |
} | |
} | |
void E_2304(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) { | |
float val0 = data1[0]; | |
float val1 = data3[0]; | |
float val2 = data5[0]; | |
for (int ridx0 = 0; ridx0 < 2304; ridx0++) { | |
float val3 = data0[ridx0]; | |
float val4 = data2[ridx0]; | |
float val5 = data4[ridx0]; | |
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f)))); | |
} | |
} | |
void E_1769472(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) { | |
float val0 = data1[0]; | |
float val1 = data3[0]; | |
float val2 = data5[0]; | |
for (int ridx0 = 0; ridx0 < 1769472; ridx0++) { | |
float val3 = data0[ridx0]; | |
float val4 = data2[ridx0]; | |
float val5 = data4[ridx0]; | |
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f)))); | |
} | |
} | |
void r_49152_4(float* data0, const float* data1) { | |
for (int ridx0 = 0; ridx0 < 49152; ridx0++) { | |
float acc0 = 0.0f; | |
for (int ridx1 = 0; ridx1 < 4; ridx1++) { | |
float val0 = data1[ridx0+(ridx1*49152)]; | |
acc0 = (val0+acc0); | |
} | |
data0[ridx0] = acc0; | |
} | |
} | |
void r_50257_768_256(float* data0, const int* data1, const int* data2, const float* data3) { | |
for (int ridx0 = 0; ridx0 < 50257; ridx0++) { | |
int val0 = data2[ridx0]; | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float acc0 = 0.0f; | |
for (int ridx2 = 0; ridx2 < 256; ridx2++) { | |
int val1 = data1[ridx2]; | |
float val2 = data3[ridx1+(ridx2*768)]; | |
acc0 = (((float)((val1==val0))*val2)+acc0); | |
} | |
data0[(ridx0*768)+ridx1] = acc0; | |
} | |
} | |
} | |
void r2_1024_768_64(float* data0, float* data1, const float* data2, const float* data3, const int* data4, const int* data5, const float* data6, const float* data7, const float* data8) { | |
float val0 = data2[0]; | |
float val1 = data3[0]; | |
float val2 = data7[0]; | |
float val3 = data8[0]; | |
for (int ridx0 = 0; ridx0 < 1024; ridx0++) { | |
int val4 = data5[ridx0]; | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float acc0 = 0.0f; | |
int alu0 = ((ridx0*768)+ridx1); | |
float val5 = data0[alu0]; | |
float val6 = data1[alu0]; | |
for (int ridx2 = 0; ridx2 < 64; ridx2++) { | |
int val7 = data4[ridx2]; | |
float val8 = data6[ridx1+(ridx2*768)]; | |
acc0 = (((float)((val7==val4))*val8)+acc0); | |
} | |
data0[alu0] = ((val0*val5)+(val1*acc0)); | |
data1[alu0] = ((val2*val6)+(val3*acc0*acc0)); | |
} | |
} | |
} | |
void r_50257_768_256n1(float* data0, const float* data1, const float* data2, const float* data3) { | |
for (int ridx0 = 0; ridx0 < 50257; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 768; ridx1++) { | |
float acc0 = 0.0f; | |
int alu0 = ((ridx0*768)+ridx1); | |
float val0 = data3[alu0]; | |
for (int ridx2 = 0; ridx2 < 256; ridx2++) { | |
float val1 = data1[ridx1+(ridx2*768)]; | |
float val2 = data2[ridx0+(ridx2*50257)]; | |
acc0 = ((val1*val2)+acc0); | |
} | |
data0[alu0] = (acc0+val0); | |
} | |
} | |
} | |
void E_786432n1(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) { | |
float val0 = data1[0]; | |
float val1 = data3[0]; | |
float val2 = data5[0]; | |
for (int ridx0 = 0; ridx0 < 786432; ridx0++) { | |
float val3 = data0[ridx0]; | |
float val4 = data2[ridx0]; | |
float val5 = data4[ridx0]; | |
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f)))); | |
} | |
} | |
void E_38597376(float* data0, const float* data1, const float* data2, const float* data3) { | |
float val0 = data1[0]; | |
float val1 = data2[0]; | |
for (int ridx0 = 0; ridx0 < 38597376; ridx0++) { | |
float val2 = data0[ridx0]; | |
float val3 = data3[ridx0]; | |
data0[ridx0] = ((val0*val2)+(val1*val3)); | |
} | |
} | |
void E_38597376n1(float* data0, const float* data1, const float* data2, const float* data3) { | |
float val0 = data1[0]; | |
float val1 = data2[0]; | |
for (int ridx0 = 0; ridx0 < 38597376; ridx0++) { | |
float val2 = data0[ridx0]; | |
float val3 = data3[ridx0]; | |
data0[ridx0] = ((val0*val2)+(val1*val3*val3)); | |
} | |
} | |
void E_38597376n2(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) { | |
float val0 = data1[0]; | |
float val1 = data3[0]; | |
float val2 = data5[0]; | |
for (int ridx0 = 0; ridx0 < 38597376; ridx0++) { | |
float val3 = data0[ridx0]; | |
float val4 = data2[ridx0]; | |
float val5 = data4[ridx0]; | |
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f)))); | |
} | |
} | |
int main() { | |
int* adam_t = (int*)malloc(4); | |
int* X = (int*)malloc(1024); | |
int* b2 = (int*)malloc(256); | |
bool* b3 = (bool*)malloc(4096); | |
float* h_0_attn_bias = (float*)malloc(4194304); | |
bool* b5 = (bool*)malloc(4096); | |
float* h_1_attn_bias = (float*)malloc(4194304); | |
bool* b7 = (bool*)malloc(4096); | |
float* h_2_attn_bias = (float*)malloc(4194304); | |
bool* b9 = (bool*)malloc(4096); | |
float* h_3_attn_bias = (float*)malloc(4194304); | |
bool* b11 = (bool*)malloc(4096); | |
float* h_4_attn_bias = (float*)malloc(4194304); | |
bool* b13 = (bool*)malloc(4096); | |
float* h_5_attn_bias = (float*)malloc(4194304); | |
bool* b15 = (bool*)malloc(4096); | |
float* h_6_attn_bias = (float*)malloc(4194304); | |
bool* b17 = (bool*)malloc(4096); | |
float* h_7_attn_bias = (float*)malloc(4194304); | |
bool* b19 = (bool*)malloc(4096); | |
float* h_8_attn_bias = (float*)malloc(4194304); | |
bool* b21 = (bool*)malloc(4096); | |
float* h_9_attn_bias = (float*)malloc(4194304); | |
bool* b23 = (bool*)malloc(4096); | |
float* h_10_attn_bias = (float*)malloc(4194304); | |
bool* b25 = (bool*)malloc(4096); | |
float* h_11_attn_bias = (float*)malloc(4194304); | |
int* Y = (int*)malloc(1024); | |
int* b28 = (int*)malloc(201028); | |
float* b29 = (float*)malloc(4); | |
float* adam_b1 = (float*)malloc(4); | |
float* b31 = (float*)malloc(4); | |
float* adam_b2 = (float*)malloc(4); | |
float* b33 = (float*)malloc(4); | |
float* b34 = (float*)malloc(4); | |
float* b35 = (float*)malloc(196608); | |
int* wpe_arange = (int*)malloc(4096); | |
float* wpe_weight = (float*)malloc(3145728); | |
int* b38 = (int*)malloc(1024); | |
int* b39 = (int*)malloc(4); | |
float* b40 = (float*)malloc(786432); | |
int* wte_arange = (int*)malloc(201028); | |
float* lm_head_weight = (float*)malloc(154389504); | |
float* b43 = (float*)malloc(4); | |
float* b44 = (float*)malloc(1024); | |
float* b45 = (float*)malloc(1024); | |
float* b46 = (float*)malloc(1024); | |
float* b47 = (float*)malloc(786432); | |
float* h_0_ln_1_weight = (float*)malloc(3072); | |
float* h_0_ln_1_bias = (float*)malloc(3072); | |
float* b50 = (float*)malloc(2359296); | |
float* h_0_attn_c_attn_weight = (float*)malloc(7077888); | |
float* h_0_attn_c_attn_bias = (float*)malloc(9216); | |
float* b53 = (float*)malloc(786432); | |
float* b54 = (float*)malloc(12288); | |
float* b55 = (float*)malloc(12288); | |
float* b56 = (float*)malloc(12288); | |
float* b57 = (float*)malloc(786432); | |
float* b58 = (float*)malloc(786432); | |
float* b59 = (float*)malloc(786432); | |
float* h_0_attn_c_proj_weight = (float*)malloc(2359296); | |
float* h_0_attn_c_proj_bias = (float*)malloc(3072); | |
float* b62 = (float*)malloc(1024); | |
float* b63 = (float*)malloc(1024); | |
float* b64 = (float*)malloc(1024); | |
float* b65 = (float*)malloc(786432); | |
float* h_0_ln_2_weight = (float*)malloc(3072); | |
float* h_0_ln_2_bias = (float*)malloc(3072); | |
float* b68 = (float*)malloc(3145728); | |
float* h_0_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* h_0_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b71 = (float*)malloc(3145728); | |
float* b72 = (float*)malloc(786432); | |
float* h_0_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* h_0_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b75 = (float*)malloc(1024); | |
float* b76 = (float*)malloc(1024); | |
float* b77 = (float*)malloc(1024); | |
float* b78 = (float*)malloc(786432); | |
float* h_1_ln_1_weight = (float*)malloc(3072); | |
float* h_1_ln_1_bias = (float*)malloc(3072); | |
float* b81 = (float*)malloc(2359296); | |
float* h_1_attn_c_attn_weight = (float*)malloc(7077888); | |
float* h_1_attn_c_attn_bias = (float*)malloc(9216); | |
float* b84 = (float*)malloc(786432); | |
float* b85 = (float*)malloc(12288); | |
float* b86 = (float*)malloc(12288); | |
float* b87 = (float*)malloc(12288); | |
float* b88 = (float*)malloc(786432); | |
float* b89 = (float*)malloc(786432); | |
float* b90 = (float*)malloc(786432); | |
float* h_1_attn_c_proj_weight = (float*)malloc(2359296); | |
float* h_1_attn_c_proj_bias = (float*)malloc(3072); | |
float* b93 = (float*)malloc(1024); | |
float* b94 = (float*)malloc(1024); | |
float* b95 = (float*)malloc(1024); | |
float* b96 = (float*)malloc(786432); | |
float* h_1_ln_2_weight = (float*)malloc(3072); | |
float* h_1_ln_2_bias = (float*)malloc(3072); | |
float* b99 = (float*)malloc(3145728); | |
float* h_1_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* h_1_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b102 = (float*)malloc(3145728); | |
float* b103 = (float*)malloc(786432); | |
float* h_1_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* h_1_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b106 = (float*)malloc(1024); | |
float* b107 = (float*)malloc(1024); | |
float* b108 = (float*)malloc(1024); | |
float* b109 = (float*)malloc(786432); | |
float* h_2_ln_1_weight = (float*)malloc(3072); | |
float* h_2_ln_1_bias = (float*)malloc(3072); | |
float* b112 = (float*)malloc(2359296); | |
float* h_2_attn_c_attn_weight = (float*)malloc(7077888); | |
float* h_2_attn_c_attn_bias = (float*)malloc(9216); | |
float* b115 = (float*)malloc(786432); | |
float* b116 = (float*)malloc(12288); | |
float* b117 = (float*)malloc(12288); | |
float* b118 = (float*)malloc(12288); | |
float* b119 = (float*)malloc(786432); | |
float* b120 = (float*)malloc(786432); | |
float* b121 = (float*)malloc(786432); | |
float* h_2_attn_c_proj_weight = (float*)malloc(2359296); | |
float* h_2_attn_c_proj_bias = (float*)malloc(3072); | |
float* b124 = (float*)malloc(1024); | |
float* b125 = (float*)malloc(1024); | |
float* b126 = (float*)malloc(1024); | |
float* b127 = (float*)malloc(786432); | |
float* h_2_ln_2_weight = (float*)malloc(3072); | |
float* h_2_ln_2_bias = (float*)malloc(3072); | |
float* b130 = (float*)malloc(3145728); | |
float* h_2_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* h_2_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b133 = (float*)malloc(3145728); | |
float* b134 = (float*)malloc(786432); | |
float* h_2_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* h_2_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b137 = (float*)malloc(1024); | |
float* b138 = (float*)malloc(1024); | |
float* b139 = (float*)malloc(1024); | |
float* b140 = (float*)malloc(786432); | |
float* h_3_ln_1_weight = (float*)malloc(3072); | |
float* h_3_ln_1_bias = (float*)malloc(3072); | |
float* b143 = (float*)malloc(2359296); | |
float* h_3_attn_c_attn_weight = (float*)malloc(7077888); | |
float* h_3_attn_c_attn_bias = (float*)malloc(9216); | |
float* b146 = (float*)malloc(786432); | |
float* b147 = (float*)malloc(12288); | |
float* b148 = (float*)malloc(12288); | |
float* b149 = (float*)malloc(12288); | |
float* b150 = (float*)malloc(786432); | |
float* b151 = (float*)malloc(786432); | |
float* b152 = (float*)malloc(786432); | |
float* h_3_attn_c_proj_weight = (float*)malloc(2359296); | |
float* h_3_attn_c_proj_bias = (float*)malloc(3072); | |
float* b155 = (float*)malloc(1024); | |
float* b156 = (float*)malloc(1024); | |
float* b157 = (float*)malloc(1024); | |
float* b158 = (float*)malloc(786432); | |
float* h_3_ln_2_weight = (float*)malloc(3072); | |
float* h_3_ln_2_bias = (float*)malloc(3072); | |
float* b161 = (float*)malloc(3145728); | |
float* h_3_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* h_3_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b164 = (float*)malloc(3145728); | |
float* b165 = (float*)malloc(786432); | |
float* h_3_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* h_3_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b168 = (float*)malloc(1024); | |
float* b169 = (float*)malloc(1024); | |
float* b170 = (float*)malloc(1024); | |
float* b171 = (float*)malloc(786432); | |
float* h_4_ln_1_weight = (float*)malloc(3072); | |
float* h_4_ln_1_bias = (float*)malloc(3072); | |
float* b174 = (float*)malloc(2359296); | |
float* h_4_attn_c_attn_weight = (float*)malloc(7077888); | |
float* h_4_attn_c_attn_bias = (float*)malloc(9216); | |
float* b177 = (float*)malloc(786432); | |
float* b178 = (float*)malloc(12288); | |
float* b179 = (float*)malloc(12288); | |
float* b180 = (float*)malloc(12288); | |
float* b181 = (float*)malloc(786432); | |
float* b182 = (float*)malloc(786432); | |
float* b183 = (float*)malloc(786432); | |
float* h_4_attn_c_proj_weight = (float*)malloc(2359296); | |
float* h_4_attn_c_proj_bias = (float*)malloc(3072); | |
float* b186 = (float*)malloc(1024); | |
float* b187 = (float*)malloc(1024); | |
float* b188 = (float*)malloc(1024); | |
float* b189 = (float*)malloc(786432); | |
float* h_4_ln_2_weight = (float*)malloc(3072); | |
float* h_4_ln_2_bias = (float*)malloc(3072); | |
float* b192 = (float*)malloc(3145728); | |
float* h_4_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* h_4_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b195 = (float*)malloc(3145728); | |
float* b196 = (float*)malloc(786432); | |
float* h_4_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* h_4_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b199 = (float*)malloc(1024); | |
float* b200 = (float*)malloc(1024); | |
float* b201 = (float*)malloc(1024); | |
float* b202 = (float*)malloc(786432); | |
float* h_5_ln_1_weight = (float*)malloc(3072); | |
float* h_5_ln_1_bias = (float*)malloc(3072); | |
float* b205 = (float*)malloc(2359296); | |
float* h_5_attn_c_attn_weight = (float*)malloc(7077888); | |
float* h_5_attn_c_attn_bias = (float*)malloc(9216); | |
float* b208 = (float*)malloc(786432); | |
float* b209 = (float*)malloc(12288); | |
float* b210 = (float*)malloc(12288); | |
float* b211 = (float*)malloc(12288); | |
float* b212 = (float*)malloc(786432); | |
float* b213 = (float*)malloc(786432); | |
float* b214 = (float*)malloc(786432); | |
float* h_5_attn_c_proj_weight = (float*)malloc(2359296); | |
float* h_5_attn_c_proj_bias = (float*)malloc(3072); | |
float* b217 = (float*)malloc(1024); | |
float* b218 = (float*)malloc(1024); | |
float* b219 = (float*)malloc(1024); | |
float* b220 = (float*)malloc(786432); | |
float* h_5_ln_2_weight = (float*)malloc(3072); | |
float* h_5_ln_2_bias = (float*)malloc(3072); | |
float* b223 = (float*)malloc(3145728); | |
float* h_5_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* h_5_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b226 = (float*)malloc(3145728); | |
float* b227 = (float*)malloc(786432); | |
float* h_5_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* h_5_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b230 = (float*)malloc(1024); | |
float* b231 = (float*)malloc(1024); | |
float* b232 = (float*)malloc(1024); | |
float* b233 = (float*)malloc(786432); | |
float* h_6_ln_1_weight = (float*)malloc(3072); | |
float* h_6_ln_1_bias = (float*)malloc(3072); | |
float* b236 = (float*)malloc(2359296); | |
float* h_6_attn_c_attn_weight = (float*)malloc(7077888); | |
float* h_6_attn_c_attn_bias = (float*)malloc(9216); | |
float* b239 = (float*)malloc(786432); | |
float* b240 = (float*)malloc(12288); | |
float* b241 = (float*)malloc(12288); | |
float* b242 = (float*)malloc(12288); | |
float* b243 = (float*)malloc(786432); | |
float* b244 = (float*)malloc(786432); | |
float* b245 = (float*)malloc(786432); | |
float* h_6_attn_c_proj_weight = (float*)malloc(2359296); | |
float* h_6_attn_c_proj_bias = (float*)malloc(3072); | |
float* b248 = (float*)malloc(1024); | |
float* b249 = (float*)malloc(1024); | |
float* b250 = (float*)malloc(1024); | |
float* b251 = (float*)malloc(786432); | |
float* h_6_ln_2_weight = (float*)malloc(3072); | |
float* h_6_ln_2_bias = (float*)malloc(3072); | |
float* b254 = (float*)malloc(3145728); | |
float* h_6_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* h_6_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b257 = (float*)malloc(3145728); | |
float* b258 = (float*)malloc(786432); | |
float* h_6_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* h_6_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b261 = (float*)malloc(1024); | |
float* b262 = (float*)malloc(1024); | |
float* b263 = (float*)malloc(1024); | |
float* b264 = (float*)malloc(786432); | |
float* h_7_ln_1_weight = (float*)malloc(3072); | |
float* h_7_ln_1_bias = (float*)malloc(3072); | |
float* b267 = (float*)malloc(2359296); | |
float* h_7_attn_c_attn_weight = (float*)malloc(7077888); | |
float* h_7_attn_c_attn_bias = (float*)malloc(9216); | |
float* b270 = (float*)malloc(786432); | |
float* b271 = (float*)malloc(12288); | |
float* b272 = (float*)malloc(12288); | |
float* b273 = (float*)malloc(12288); | |
float* b274 = (float*)malloc(786432); | |
float* b275 = (float*)malloc(786432); | |
float* b276 = (float*)malloc(786432); | |
float* h_7_attn_c_proj_weight = (float*)malloc(2359296); | |
float* h_7_attn_c_proj_bias = (float*)malloc(3072); | |
float* b279 = (float*)malloc(1024); | |
float* b280 = (float*)malloc(1024); | |
float* b281 = (float*)malloc(1024); | |
float* b282 = (float*)malloc(786432); | |
float* h_7_ln_2_weight = (float*)malloc(3072); | |
float* h_7_ln_2_bias = (float*)malloc(3072); | |
float* b285 = (float*)malloc(3145728); | |
float* h_7_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* h_7_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b288 = (float*)malloc(3145728); | |
float* b289 = (float*)malloc(786432); | |
float* h_7_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* h_7_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b292 = (float*)malloc(1024); | |
float* b293 = (float*)malloc(1024); | |
float* b294 = (float*)malloc(1024); | |
float* b295 = (float*)malloc(786432); | |
float* h_8_ln_1_weight = (float*)malloc(3072); | |
float* h_8_ln_1_bias = (float*)malloc(3072); | |
float* b298 = (float*)malloc(2359296); | |
float* h_8_attn_c_attn_weight = (float*)malloc(7077888); | |
float* h_8_attn_c_attn_bias = (float*)malloc(9216); | |
float* b301 = (float*)malloc(786432); | |
float* b302 = (float*)malloc(12288); | |
float* b303 = (float*)malloc(12288); | |
float* b304 = (float*)malloc(12288); | |
float* b305 = (float*)malloc(786432); | |
float* b306 = (float*)malloc(786432); | |
float* b307 = (float*)malloc(786432); | |
float* h_8_attn_c_proj_weight = (float*)malloc(2359296); | |
float* h_8_attn_c_proj_bias = (float*)malloc(3072); | |
float* b310 = (float*)malloc(1024); | |
float* b311 = (float*)malloc(1024); | |
float* b312 = (float*)malloc(1024); | |
float* b313 = (float*)malloc(786432); | |
float* h_8_ln_2_weight = (float*)malloc(3072); | |
float* h_8_ln_2_bias = (float*)malloc(3072); | |
float* b316 = (float*)malloc(3145728); | |
float* h_8_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* h_8_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b319 = (float*)malloc(3145728); | |
float* b320 = (float*)malloc(786432); | |
float* h_8_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* h_8_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b323 = (float*)malloc(1024); | |
float* b324 = (float*)malloc(1024); | |
float* b325 = (float*)malloc(1024); | |
float* b326 = (float*)malloc(786432); | |
float* h_9_ln_1_weight = (float*)malloc(3072); | |
float* h_9_ln_1_bias = (float*)malloc(3072); | |
float* b329 = (float*)malloc(2359296); | |
float* h_9_attn_c_attn_weight = (float*)malloc(7077888); | |
float* h_9_attn_c_attn_bias = (float*)malloc(9216); | |
float* b332 = (float*)malloc(786432); | |
float* b333 = (float*)malloc(12288); | |
float* b334 = (float*)malloc(12288); | |
float* b335 = (float*)malloc(12288); | |
float* b336 = (float*)malloc(786432); | |
float* b337 = (float*)malloc(786432); | |
float* b338 = (float*)malloc(786432); | |
float* h_9_attn_c_proj_weight = (float*)malloc(2359296); | |
float* h_9_attn_c_proj_bias = (float*)malloc(3072); | |
float* b341 = (float*)malloc(1024); | |
float* b342 = (float*)malloc(1024); | |
float* b343 = (float*)malloc(1024); | |
float* b344 = (float*)malloc(786432); | |
float* h_9_ln_2_weight = (float*)malloc(3072); | |
float* h_9_ln_2_bias = (float*)malloc(3072); | |
float* b347 = (float*)malloc(3145728); | |
float* h_9_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* h_9_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b350 = (float*)malloc(3145728); | |
float* b351 = (float*)malloc(786432); | |
float* h_9_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* h_9_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b354 = (float*)malloc(1024); | |
float* b355 = (float*)malloc(1024); | |
float* b356 = (float*)malloc(1024); | |
float* b357 = (float*)malloc(786432); | |
float* h_10_ln_1_weight = (float*)malloc(3072); | |
float* h_10_ln_1_bias = (float*)malloc(3072); | |
float* b360 = (float*)malloc(2359296); | |
float* h_10_attn_c_attn_weight = (float*)malloc(7077888); | |
float* h_10_attn_c_attn_bias = (float*)malloc(9216); | |
float* b363 = (float*)malloc(786432); | |
float* b364 = (float*)malloc(12288); | |
float* b365 = (float*)malloc(12288); | |
float* b366 = (float*)malloc(12288); | |
float* b367 = (float*)malloc(786432); | |
float* b368 = (float*)malloc(786432); | |
float* b369 = (float*)malloc(786432); | |
float* h_10_attn_c_proj_weight = (float*)malloc(2359296); | |
float* h_10_attn_c_proj_bias = (float*)malloc(3072); | |
float* b372 = (float*)malloc(1024); | |
float* b373 = (float*)malloc(1024); | |
float* b374 = (float*)malloc(1024); | |
float* b375 = (float*)malloc(786432); | |
float* h_10_ln_2_weight = (float*)malloc(3072); | |
float* h_10_ln_2_bias = (float*)malloc(3072); | |
float* b378 = (float*)malloc(3145728); | |
float* h_10_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* h_10_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b381 = (float*)malloc(3145728); | |
float* b382 = (float*)malloc(786432); | |
float* h_10_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* h_10_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b385 = (float*)malloc(1024); | |
float* b386 = (float*)malloc(1024); | |
float* b387 = (float*)malloc(1024); | |
float* b388 = (float*)malloc(786432); | |
float* h_11_ln_1_weight = (float*)malloc(3072); | |
float* h_11_ln_1_bias = (float*)malloc(3072); | |
float* b391 = (float*)malloc(2359296); | |
float* h_11_attn_c_attn_weight = (float*)malloc(7077888); | |
float* h_11_attn_c_attn_bias = (float*)malloc(9216); | |
float* b394 = (float*)malloc(786432); | |
float* b395 = (float*)malloc(12288); | |
float* b396 = (float*)malloc(12288); | |
float* b397 = (float*)malloc(12288); | |
float* b398 = (float*)malloc(786432); | |
float* b399 = (float*)malloc(786432); | |
float* b400 = (float*)malloc(786432); | |
float* h_11_attn_c_proj_weight = (float*)malloc(2359296); | |
float* h_11_attn_c_proj_bias = (float*)malloc(3072); | |
float* b403 = (float*)malloc(1024); | |
float* b404 = (float*)malloc(1024); | |
float* b405 = (float*)malloc(1024); | |
float* b406 = (float*)malloc(786432); | |
float* h_11_ln_2_weight = (float*)malloc(3072); | |
float* h_11_ln_2_bias = (float*)malloc(3072); | |
float* b409 = (float*)malloc(3145728); | |
float* h_11_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* h_11_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b412 = (float*)malloc(3145728); | |
float* b413 = (float*)malloc(786432); | |
float* h_11_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* h_11_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b416 = (float*)malloc(1024); | |
float* b417 = (float*)malloc(1024); | |
float* b418 = (float*)malloc(1024); | |
float* b419 = (float*)malloc(786432); | |
float* ln_f_weight = (float*)malloc(3072); | |
float* ln_f_bias = (float*)malloc(3072); | |
float* b422 = (float*)malloc(51463168); | |
float* b423 = (float*)malloc(1024); | |
float* b424 = (float*)malloc(1024); | |
float* b425 = (float*)malloc(1024); | |
float* b426 = (float*)malloc(1024); | |
float* b427 = (float*)malloc(1024); | |
float* loss = (float*)malloc(4); | |
float* b429 = (float*)malloc(51463168); | |
float* b430 = (float*)malloc(786432); | |
float* adam_m_ln_f_weight = (float*)malloc(3072); | |
float* adam_v_ln_f_weight = (float*)malloc(3072); | |
float* adam_m_ln_f_bias = (float*)malloc(3072); | |
float* adam_v_ln_f_bias = (float*)malloc(3072); | |
float* adam_lr = (float*)malloc(4); | |
float* b436 = (float*)malloc(786432); | |
float* adam_m_h_11_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_v_h_11_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_m_h_11_mlp_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_11_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b441 = (float*)malloc(3145728); | |
float* adam_m_h_11_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_v_h_11_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_m_h_11_mlp_c_fc_bias = (float*)malloc(12288); | |
float* adam_v_h_11_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b446 = (float*)malloc(786432); | |
float* adam_m_h_11_ln_2_weight = (float*)malloc(3072); | |
float* adam_v_h_11_ln_2_weight = (float*)malloc(3072); | |
float* adam_m_h_11_ln_2_bias = (float*)malloc(3072); | |
float* adam_v_h_11_ln_2_bias = (float*)malloc(3072); | |
float* b451 = (float*)malloc(786432); | |
float* adam_m_h_11_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_v_h_11_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_m_h_11_attn_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_11_attn_c_proj_bias = (float*)malloc(3072); | |
float* b456 = (float*)malloc(786432); | |
float* b457 = (float*)malloc(786432); | |
float* b458 = (float*)malloc(12288); | |
float* b459 = (float*)malloc(12288); | |
float* b460 = (float*)malloc(2359296); | |
float* adam_m_h_11_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_v_h_11_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_m_h_11_attn_c_attn_bias = (float*)malloc(9216); | |
float* adam_v_h_11_attn_c_attn_bias = (float*)malloc(9216); | |
float* b465 = (float*)malloc(786432); | |
float* adam_m_h_11_ln_1_weight = (float*)malloc(3072); | |
float* adam_v_h_11_ln_1_weight = (float*)malloc(3072); | |
float* adam_m_h_11_ln_1_bias = (float*)malloc(3072); | |
float* adam_v_h_11_ln_1_bias = (float*)malloc(3072); | |
float* b470 = (float*)malloc(786432); | |
float* adam_m_h_10_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_v_h_10_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_m_h_10_mlp_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_10_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b475 = (float*)malloc(3145728); | |
float* adam_m_h_10_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_v_h_10_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_m_h_10_mlp_c_fc_bias = (float*)malloc(12288); | |
float* adam_v_h_10_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b480 = (float*)malloc(786432); | |
float* adam_m_h_10_ln_2_weight = (float*)malloc(3072); | |
float* adam_v_h_10_ln_2_weight = (float*)malloc(3072); | |
float* adam_m_h_10_ln_2_bias = (float*)malloc(3072); | |
float* adam_v_h_10_ln_2_bias = (float*)malloc(3072); | |
float* b485 = (float*)malloc(786432); | |
float* adam_m_h_10_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_v_h_10_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_m_h_10_attn_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_10_attn_c_proj_bias = (float*)malloc(3072); | |
float* b490 = (float*)malloc(2359296); | |
float* adam_m_h_10_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_v_h_10_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_m_h_10_attn_c_attn_bias = (float*)malloc(9216); | |
float* adam_v_h_10_attn_c_attn_bias = (float*)malloc(9216); | |
float* b495 = (float*)malloc(786432); | |
float* adam_m_h_10_ln_1_weight = (float*)malloc(3072); | |
float* adam_v_h_10_ln_1_weight = (float*)malloc(3072); | |
float* adam_m_h_10_ln_1_bias = (float*)malloc(3072); | |
float* adam_v_h_10_ln_1_bias = (float*)malloc(3072); | |
float* b500 = (float*)malloc(786432); | |
float* adam_m_h_9_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_v_h_9_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_m_h_9_mlp_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_9_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b505 = (float*)malloc(3145728); | |
float* adam_m_h_9_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_v_h_9_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_m_h_9_mlp_c_fc_bias = (float*)malloc(12288); | |
float* adam_v_h_9_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b510 = (float*)malloc(786432); | |
float* adam_m_h_9_ln_2_weight = (float*)malloc(3072); | |
float* adam_v_h_9_ln_2_weight = (float*)malloc(3072); | |
float* adam_m_h_9_ln_2_bias = (float*)malloc(3072); | |
float* adam_v_h_9_ln_2_bias = (float*)malloc(3072); | |
float* b515 = (float*)malloc(786432); | |
float* adam_m_h_9_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_v_h_9_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_m_h_9_attn_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_9_attn_c_proj_bias = (float*)malloc(3072); | |
float* b520 = (float*)malloc(2359296); | |
float* adam_m_h_9_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_v_h_9_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_m_h_9_attn_c_attn_bias = (float*)malloc(9216); | |
float* adam_v_h_9_attn_c_attn_bias = (float*)malloc(9216); | |
float* b525 = (float*)malloc(786432); | |
float* adam_m_h_9_ln_1_weight = (float*)malloc(3072); | |
float* adam_v_h_9_ln_1_weight = (float*)malloc(3072); | |
float* adam_m_h_9_ln_1_bias = (float*)malloc(3072); | |
float* adam_v_h_9_ln_1_bias = (float*)malloc(3072); | |
float* b530 = (float*)malloc(786432); | |
float* adam_m_h_8_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_v_h_8_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_m_h_8_mlp_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_8_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b535 = (float*)malloc(3145728); | |
float* adam_m_h_8_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_v_h_8_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_m_h_8_mlp_c_fc_bias = (float*)malloc(12288); | |
float* adam_v_h_8_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b540 = (float*)malloc(786432); | |
float* adam_m_h_8_ln_2_weight = (float*)malloc(3072); | |
float* adam_v_h_8_ln_2_weight = (float*)malloc(3072); | |
float* adam_m_h_8_ln_2_bias = (float*)malloc(3072); | |
float* adam_v_h_8_ln_2_bias = (float*)malloc(3072); | |
float* b545 = (float*)malloc(786432); | |
float* adam_m_h_8_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_v_h_8_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_m_h_8_attn_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_8_attn_c_proj_bias = (float*)malloc(3072); | |
float* b550 = (float*)malloc(2359296); | |
float* adam_m_h_8_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_v_h_8_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_m_h_8_attn_c_attn_bias = (float*)malloc(9216); | |
float* adam_v_h_8_attn_c_attn_bias = (float*)malloc(9216); | |
float* b555 = (float*)malloc(786432); | |
float* adam_m_h_8_ln_1_weight = (float*)malloc(3072); | |
float* adam_v_h_8_ln_1_weight = (float*)malloc(3072); | |
float* adam_m_h_8_ln_1_bias = (float*)malloc(3072); | |
float* adam_v_h_8_ln_1_bias = (float*)malloc(3072); | |
float* b560 = (float*)malloc(786432); | |
float* adam_m_h_7_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_v_h_7_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_m_h_7_mlp_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_7_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b565 = (float*)malloc(3145728); | |
float* adam_m_h_7_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_v_h_7_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_m_h_7_mlp_c_fc_bias = (float*)malloc(12288); | |
float* adam_v_h_7_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b570 = (float*)malloc(786432); | |
float* adam_m_h_7_ln_2_weight = (float*)malloc(3072); | |
float* adam_v_h_7_ln_2_weight = (float*)malloc(3072); | |
float* adam_m_h_7_ln_2_bias = (float*)malloc(3072); | |
float* adam_v_h_7_ln_2_bias = (float*)malloc(3072); | |
float* b575 = (float*)malloc(786432); | |
float* adam_m_h_7_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_v_h_7_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_m_h_7_attn_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_7_attn_c_proj_bias = (float*)malloc(3072); | |
float* b580 = (float*)malloc(2359296); | |
float* adam_m_h_7_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_v_h_7_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_m_h_7_attn_c_attn_bias = (float*)malloc(9216); | |
float* adam_v_h_7_attn_c_attn_bias = (float*)malloc(9216); | |
float* b585 = (float*)malloc(786432); | |
float* adam_m_h_7_ln_1_weight = (float*)malloc(3072); | |
float* adam_v_h_7_ln_1_weight = (float*)malloc(3072); | |
float* adam_m_h_7_ln_1_bias = (float*)malloc(3072); | |
float* adam_v_h_7_ln_1_bias = (float*)malloc(3072); | |
float* b590 = (float*)malloc(786432); | |
float* adam_m_h_6_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_v_h_6_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_m_h_6_mlp_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_6_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b595 = (float*)malloc(3145728); | |
float* adam_m_h_6_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_v_h_6_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_m_h_6_mlp_c_fc_bias = (float*)malloc(12288); | |
float* adam_v_h_6_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b600 = (float*)malloc(786432); | |
float* adam_m_h_6_ln_2_weight = (float*)malloc(3072); | |
float* adam_v_h_6_ln_2_weight = (float*)malloc(3072); | |
float* adam_m_h_6_ln_2_bias = (float*)malloc(3072); | |
float* adam_v_h_6_ln_2_bias = (float*)malloc(3072); | |
float* b605 = (float*)malloc(786432); | |
float* adam_m_h_6_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_v_h_6_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_m_h_6_attn_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_6_attn_c_proj_bias = (float*)malloc(3072); | |
float* b610 = (float*)malloc(2359296); | |
float* adam_m_h_6_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_v_h_6_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_m_h_6_attn_c_attn_bias = (float*)malloc(9216); | |
float* adam_v_h_6_attn_c_attn_bias = (float*)malloc(9216); | |
float* b615 = (float*)malloc(786432); | |
float* adam_m_h_6_ln_1_weight = (float*)malloc(3072); | |
float* adam_v_h_6_ln_1_weight = (float*)malloc(3072); | |
float* adam_m_h_6_ln_1_bias = (float*)malloc(3072); | |
float* adam_v_h_6_ln_1_bias = (float*)malloc(3072); | |
float* b620 = (float*)malloc(786432); | |
float* adam_m_h_5_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_v_h_5_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_m_h_5_mlp_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_5_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b625 = (float*)malloc(3145728); | |
float* adam_m_h_5_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_v_h_5_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_m_h_5_mlp_c_fc_bias = (float*)malloc(12288); | |
float* adam_v_h_5_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b630 = (float*)malloc(786432); | |
float* adam_m_h_5_ln_2_weight = (float*)malloc(3072); | |
float* adam_v_h_5_ln_2_weight = (float*)malloc(3072); | |
float* adam_m_h_5_ln_2_bias = (float*)malloc(3072); | |
float* adam_v_h_5_ln_2_bias = (float*)malloc(3072); | |
float* b635 = (float*)malloc(786432); | |
float* adam_m_h_5_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_v_h_5_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_m_h_5_attn_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_5_attn_c_proj_bias = (float*)malloc(3072); | |
float* b640 = (float*)malloc(2359296); | |
float* adam_m_h_5_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_v_h_5_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_m_h_5_attn_c_attn_bias = (float*)malloc(9216); | |
float* adam_v_h_5_attn_c_attn_bias = (float*)malloc(9216); | |
float* b645 = (float*)malloc(786432); | |
float* adam_m_h_5_ln_1_weight = (float*)malloc(3072); | |
float* adam_v_h_5_ln_1_weight = (float*)malloc(3072); | |
float* adam_m_h_5_ln_1_bias = (float*)malloc(3072); | |
float* adam_v_h_5_ln_1_bias = (float*)malloc(3072); | |
float* b650 = (float*)malloc(786432); | |
float* adam_m_h_4_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_v_h_4_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_m_h_4_mlp_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_4_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b655 = (float*)malloc(3145728); | |
float* adam_m_h_4_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_v_h_4_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_m_h_4_mlp_c_fc_bias = (float*)malloc(12288); | |
float* adam_v_h_4_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b660 = (float*)malloc(786432); | |
float* adam_m_h_4_ln_2_weight = (float*)malloc(3072); | |
float* adam_v_h_4_ln_2_weight = (float*)malloc(3072); | |
float* adam_m_h_4_ln_2_bias = (float*)malloc(3072); | |
float* adam_v_h_4_ln_2_bias = (float*)malloc(3072); | |
float* b665 = (float*)malloc(786432); | |
float* adam_m_h_4_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_v_h_4_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_m_h_4_attn_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_4_attn_c_proj_bias = (float*)malloc(3072); | |
float* b670 = (float*)malloc(2359296); | |
float* adam_m_h_4_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_v_h_4_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_m_h_4_attn_c_attn_bias = (float*)malloc(9216); | |
float* adam_v_h_4_attn_c_attn_bias = (float*)malloc(9216); | |
float* b675 = (float*)malloc(786432); | |
float* adam_m_h_4_ln_1_weight = (float*)malloc(3072); | |
float* adam_v_h_4_ln_1_weight = (float*)malloc(3072); | |
float* adam_m_h_4_ln_1_bias = (float*)malloc(3072); | |
float* adam_v_h_4_ln_1_bias = (float*)malloc(3072); | |
float* b680 = (float*)malloc(786432); | |
float* adam_m_h_3_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_v_h_3_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_m_h_3_mlp_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_3_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b685 = (float*)malloc(3145728); | |
float* adam_m_h_3_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_v_h_3_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_m_h_3_mlp_c_fc_bias = (float*)malloc(12288); | |
float* adam_v_h_3_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b690 = (float*)malloc(786432); | |
float* adam_m_h_3_ln_2_weight = (float*)malloc(3072); | |
float* adam_v_h_3_ln_2_weight = (float*)malloc(3072); | |
float* adam_m_h_3_ln_2_bias = (float*)malloc(3072); | |
float* adam_v_h_3_ln_2_bias = (float*)malloc(3072); | |
float* b695 = (float*)malloc(786432); | |
float* adam_m_h_3_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_v_h_3_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_m_h_3_attn_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_3_attn_c_proj_bias = (float*)malloc(3072); | |
float* b700 = (float*)malloc(2359296); | |
float* adam_m_h_3_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_v_h_3_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_m_h_3_attn_c_attn_bias = (float*)malloc(9216); | |
float* adam_v_h_3_attn_c_attn_bias = (float*)malloc(9216); | |
float* b705 = (float*)malloc(786432); | |
float* adam_m_h_3_ln_1_weight = (float*)malloc(3072); | |
float* adam_v_h_3_ln_1_weight = (float*)malloc(3072); | |
float* adam_m_h_3_ln_1_bias = (float*)malloc(3072); | |
float* adam_v_h_3_ln_1_bias = (float*)malloc(3072); | |
float* b710 = (float*)malloc(786432); | |
float* adam_m_h_2_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_v_h_2_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_m_h_2_mlp_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_2_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b715 = (float*)malloc(3145728); | |
float* adam_m_h_2_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_v_h_2_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_m_h_2_mlp_c_fc_bias = (float*)malloc(12288); | |
float* adam_v_h_2_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b720 = (float*)malloc(786432); | |
float* adam_m_h_2_ln_2_weight = (float*)malloc(3072); | |
float* adam_v_h_2_ln_2_weight = (float*)malloc(3072); | |
float* adam_m_h_2_ln_2_bias = (float*)malloc(3072); | |
float* adam_v_h_2_ln_2_bias = (float*)malloc(3072); | |
float* b725 = (float*)malloc(786432); | |
float* adam_m_h_2_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_v_h_2_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_m_h_2_attn_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_2_attn_c_proj_bias = (float*)malloc(3072); | |
float* b730 = (float*)malloc(2359296); | |
float* adam_m_h_2_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_v_h_2_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_m_h_2_attn_c_attn_bias = (float*)malloc(9216); | |
float* adam_v_h_2_attn_c_attn_bias = (float*)malloc(9216); | |
float* b735 = (float*)malloc(786432); | |
float* adam_m_h_2_ln_1_weight = (float*)malloc(3072); | |
float* adam_v_h_2_ln_1_weight = (float*)malloc(3072); | |
float* adam_m_h_2_ln_1_bias = (float*)malloc(3072); | |
float* adam_v_h_2_ln_1_bias = (float*)malloc(3072); | |
float* b740 = (float*)malloc(786432); | |
float* adam_m_h_1_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_v_h_1_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_m_h_1_mlp_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_1_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b745 = (float*)malloc(3145728); | |
float* adam_m_h_1_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_v_h_1_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_m_h_1_mlp_c_fc_bias = (float*)malloc(12288); | |
float* adam_v_h_1_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b750 = (float*)malloc(786432); | |
float* adam_m_h_1_ln_2_weight = (float*)malloc(3072); | |
float* adam_v_h_1_ln_2_weight = (float*)malloc(3072); | |
float* adam_m_h_1_ln_2_bias = (float*)malloc(3072); | |
float* adam_v_h_1_ln_2_bias = (float*)malloc(3072); | |
float* b755 = (float*)malloc(786432); | |
float* adam_m_h_1_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_v_h_1_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_m_h_1_attn_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_1_attn_c_proj_bias = (float*)malloc(3072); | |
float* b760 = (float*)malloc(2359296); | |
float* adam_m_h_1_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_v_h_1_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_m_h_1_attn_c_attn_bias = (float*)malloc(9216); | |
float* adam_v_h_1_attn_c_attn_bias = (float*)malloc(9216); | |
float* b765 = (float*)malloc(786432); | |
float* adam_m_h_1_ln_1_weight = (float*)malloc(3072); | |
float* adam_v_h_1_ln_1_weight = (float*)malloc(3072); | |
float* adam_m_h_1_ln_1_bias = (float*)malloc(3072); | |
float* adam_v_h_1_ln_1_bias = (float*)malloc(3072); | |
float* b770 = (float*)malloc(786432); | |
float* adam_m_h_0_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_v_h_0_mlp_c_proj_weight = (float*)malloc(9437184); | |
float* adam_m_h_0_mlp_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_0_mlp_c_proj_bias = (float*)malloc(3072); | |
float* b775 = (float*)malloc(3145728); | |
float* adam_m_h_0_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_v_h_0_mlp_c_fc_weight = (float*)malloc(9437184); | |
float* adam_m_h_0_mlp_c_fc_bias = (float*)malloc(12288); | |
float* adam_v_h_0_mlp_c_fc_bias = (float*)malloc(12288); | |
float* b780 = (float*)malloc(786432); | |
float* adam_m_h_0_ln_2_weight = (float*)malloc(3072); | |
float* adam_v_h_0_ln_2_weight = (float*)malloc(3072); | |
float* adam_m_h_0_ln_2_bias = (float*)malloc(3072); | |
float* adam_v_h_0_ln_2_bias = (float*)malloc(3072); | |
float* b785 = (float*)malloc(786432); | |
float* adam_m_h_0_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_v_h_0_attn_c_proj_weight = (float*)malloc(2359296); | |
float* adam_m_h_0_attn_c_proj_bias = (float*)malloc(3072); | |
float* adam_v_h_0_attn_c_proj_bias = (float*)malloc(3072); | |
float* b790 = (float*)malloc(2359296); | |
float* adam_m_h_0_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_v_h_0_attn_c_attn_weight = (float*)malloc(7077888); | |
float* adam_m_h_0_attn_c_attn_bias = (float*)malloc(9216); | |
float* adam_v_h_0_attn_c_attn_bias = (float*)malloc(9216); | |
float* b795 = (float*)malloc(786432); | |
float* adam_m_h_0_ln_1_weight = (float*)malloc(3072); | |
float* adam_v_h_0_ln_1_weight = (float*)malloc(3072); | |
float* adam_m_h_0_ln_1_bias = (float*)malloc(3072); | |
float* adam_v_h_0_ln_1_bias = (float*)malloc(3072); | |
float* b800 = (float*)malloc(196608); | |
float* b801 = (float*)malloc(154389504); | |
float* adam_m_wpe_weight = (float*)malloc(3145728); | |
float* adam_v_wpe_weight = (float*)malloc(3145728); | |
float* grad_lm_head_weight = (float*)malloc(154389504); | |
float* adam_m_lm_head_weight = (float*)malloc(154389504); | |
float* adam_v_lm_head_weight = (float*)malloc(154389504); | |
E_(adam_t); | |
r_64_64(b2); | |
E_64_64(b3, h_0_attn_bias); | |
E_64_64(b5, h_1_attn_bias); | |
E_64_64(b7, h_2_attn_bias); | |
E_64_64(b9, h_3_attn_bias); | |
E_64_64(b11, h_4_attn_bias); | |
E_64_64(b13, h_5_attn_bias); | |
E_64_64(b15, h_6_attn_bias); | |
E_64_64(b17, h_7_attn_bias); | |
E_64_64(b19, h_8_attn_bias); | |
E_64_64(b21, h_9_attn_bias); | |
E_64_64(b23, h_10_attn_bias); | |
E_64_64(b25, h_11_attn_bias); | |
r_50257_50257(b28); | |
E_n1(b29, adam_b1); | |
E_n1(b31, adam_b2); | |
E_n2(b33, adam_b1, adam_t); | |
E_n2(b34, adam_b2, adam_t); | |
r_64_768_1024(b35, b2, wpe_arange, wpe_weight); | |
E_256(b38, Y); | |
r_256(b39, Y); | |
r_4_64_768_50257(b40, X, wte_arange, lm_head_weight, b35); | |
E_n3(b43, b39); | |
r_256_768(b44, b40); | |
r_256_768n1(b45, b40, b44); | |
E_256n1(b46, b45); | |
E_256_768(b47, b40, b44, b46, h_0_ln_1_weight, h_0_ln_1_bias); | |
r_256_2304_768(b50, b47, h_0_attn_c_attn_weight, h_0_attn_c_attn_bias); | |
r_4_12_64_64_64(b53, b3, b50); | |
r_3072_64(b54, b53); | |
r_3072_64n1(b55, b53, b54); | |
r_3072_64n2(b56, b53, b54); | |
E_3072_64(b57, b53, b54, b55); | |
r_4_12_64_64_64n1(b58, b57, b50); | |
r_4_64_768_768(b59, b40, b58, h_0_attn_c_proj_weight, h_0_attn_c_proj_bias); | |
r_256_768(b62, b59); | |
r_256_768n1(b63, b59, b62); | |
E_256n1(b64, b63); | |
E_256_768(b65, b59, b62, b64, h_0_ln_2_weight, h_0_ln_2_bias); | |
r_256_3072_768(b68, b65, h_0_mlp_c_fc_weight, h_0_mlp_c_fc_bias); | |
E_786432(b71, b68); | |
r_256_768_3072(b72, b59, b71, h_0_mlp_c_proj_weight, h_0_mlp_c_proj_bias); | |
r_256_768(b75, b72); | |
r_256_768n1(b76, b72, b75); | |
E_256n1(b77, b76); | |
E_256_768(b78, b72, b75, b77, h_1_ln_1_weight, h_1_ln_1_bias); | |
r_256_2304_768(b81, b78, h_1_attn_c_attn_weight, h_1_attn_c_attn_bias); | |
r_4_12_64_64_64(b84, b5, b81); | |
r_3072_64(b85, b84); | |
r_3072_64n1(b86, b84, b85); | |
r_3072_64n2(b87, b84, b85); | |
E_3072_64(b88, b84, b85, b86); | |
r_4_12_64_64_64n1(b89, b88, b81); | |
r_4_64_768_768(b90, b72, b89, h_1_attn_c_proj_weight, h_1_attn_c_proj_bias); | |
r_256_768(b93, b90); | |
r_256_768n1(b94, b90, b93); | |
E_256n1(b95, b94); | |
E_256_768(b96, b90, b93, b95, h_1_ln_2_weight, h_1_ln_2_bias); | |
r_256_3072_768(b99, b96, h_1_mlp_c_fc_weight, h_1_mlp_c_fc_bias); | |
E_786432(b102, b99); | |
r_256_768_3072(b103, b90, b102, h_1_mlp_c_proj_weight, h_1_mlp_c_proj_bias); | |
r_256_768(b106, b103); | |
r_256_768n1(b107, b103, b106); | |
E_256n1(b108, b107); | |
E_256_768(b109, b103, b106, b108, h_2_ln_1_weight, h_2_ln_1_bias); | |
r_256_2304_768(b112, b109, h_2_attn_c_attn_weight, h_2_attn_c_attn_bias); | |
r_4_12_64_64_64(b115, b7, b112); | |
r_3072_64(b116, b115); | |
r_3072_64n1(b117, b115, b116); | |
r_3072_64n2(b118, b115, b116); | |
E_3072_64(b119, b115, b116, b117); | |
r_4_12_64_64_64n1(b120, b119, b112); | |
r_4_64_768_768(b121, b103, b120, h_2_attn_c_proj_weight, h_2_attn_c_proj_bias); | |
r_256_768(b124, b121); | |
r_256_768n1(b125, b121, b124); | |
E_256n1(b126, b125); | |
E_256_768(b127, b121, b124, b126, h_2_ln_2_weight, h_2_ln_2_bias); | |
r_256_3072_768(b130, b127, h_2_mlp_c_fc_weight, h_2_mlp_c_fc_bias); | |
E_786432(b133, b130); | |
r_256_768_3072(b134, b121, b133, h_2_mlp_c_proj_weight, h_2_mlp_c_proj_bias); | |
r_256_768(b137, b134); | |
r_256_768n1(b138, b134, b137); | |
E_256n1(b139, b138); | |
E_256_768(b140, b134, b137, b139, h_3_ln_1_weight, h_3_ln_1_bias); | |
r_256_2304_768(b143, b140, h_3_attn_c_attn_weight, h_3_attn_c_attn_bias); | |
r_4_12_64_64_64(b146, b9, b143); | |
r_3072_64(b147, b146); | |
r_3072_64n1(b148, b146, b147); | |
r_3072_64n2(b149, b146, b147); | |
E_3072_64(b150, b146, b147, b148); | |
r_4_12_64_64_64n1(b151, b150, b143); | |
r_4_64_768_768(b152, b134, b151, h_3_attn_c_proj_weight, h_3_attn_c_proj_bias); | |
r_256_768(b155, b152); | |
r_256_768n1(b156, b152, b155); | |
E_256n1(b157, b156); | |
E_256_768(b158, b152, b155, b157, h_3_ln_2_weight, h_3_ln_2_bias); | |
r_256_3072_768(b161, b158, h_3_mlp_c_fc_weight, h_3_mlp_c_fc_bias); | |
E_786432(b164, b161); | |
r_256_768_3072(b165, b152, b164, h_3_mlp_c_proj_weight, h_3_mlp_c_proj_bias); | |
r_256_768(b168, b165); | |
r_256_768n1(b169, b165, b168); | |
E_256n1(b170, b169); | |
E_256_768(b171, b165, b168, b170, h_4_ln_1_weight, h_4_ln_1_bias); | |
r_256_2304_768(b174, b171, h_4_attn_c_attn_weight, h_4_attn_c_attn_bias); | |
r_4_12_64_64_64(b177, b11, b174); | |
r_3072_64(b178, b177); | |
r_3072_64n1(b179, b177, b178); | |
r_3072_64n2(b180, b177, b178); | |
E_3072_64(b181, b177, b178, b179); | |
r_4_12_64_64_64n1(b182, b181, b174); | |
r_4_64_768_768(b183, b165, b182, h_4_attn_c_proj_weight, h_4_attn_c_proj_bias); | |
r_256_768(b186, b183); | |
r_256_768n1(b187, b183, b186); | |
E_256n1(b188, b187); | |
E_256_768(b189, b183, b186, b188, h_4_ln_2_weight, h_4_ln_2_bias); | |
r_256_3072_768(b192, b189, h_4_mlp_c_fc_weight, h_4_mlp_c_fc_bias); | |
E_786432(b195, b192); | |
r_256_768_3072(b196, b183, b195, h_4_mlp_c_proj_weight, h_4_mlp_c_proj_bias); | |
r_256_768(b199, b196); | |
r_256_768n1(b200, b196, b199); | |
E_256n1(b201, b200); | |
E_256_768(b202, b196, b199, b201, h_5_ln_1_weight, h_5_ln_1_bias); | |
r_256_2304_768(b205, b202, h_5_attn_c_attn_weight, h_5_attn_c_attn_bias); | |
r_4_12_64_64_64(b208, b13, b205); | |
r_3072_64(b209, b208); | |
r_3072_64n1(b210, b208, b209); | |
r_3072_64n2(b211, b208, b209); | |
E_3072_64(b212, b208, b209, b210); | |
r_4_12_64_64_64n1(b213, b212, b205); | |
r_4_64_768_768(b214, b196, b213, h_5_attn_c_proj_weight, h_5_attn_c_proj_bias); | |
r_256_768(b217, b214); | |
r_256_768n1(b218, b214, b217); | |
E_256n1(b219, b218); | |
E_256_768(b220, b214, b217, b219, h_5_ln_2_weight, h_5_ln_2_bias); | |
r_256_3072_768(b223, b220, h_5_mlp_c_fc_weight, h_5_mlp_c_fc_bias); | |
E_786432(b226, b223); | |
r_256_768_3072(b227, b214, b226, h_5_mlp_c_proj_weight, h_5_mlp_c_proj_bias); | |
r_256_768(b230, b227); | |
r_256_768n1(b231, b227, b230); | |
E_256n1(b232, b231); | |
E_256_768(b233, b227, b230, b232, h_6_ln_1_weight, h_6_ln_1_bias); | |
r_256_2304_768(b236, b233, h_6_attn_c_attn_weight, h_6_attn_c_attn_bias); | |
r_4_12_64_64_64(b239, b15, b236); | |
r_3072_64(b240, b239); | |
r_3072_64n1(b241, b239, b240); | |
r_3072_64n2(b242, b239, b240); | |
E_3072_64(b243, b239, b240, b241); | |
r_4_12_64_64_64n1(b244, b243, b236); | |
r_4_64_768_768(b245, b227, b244, h_6_attn_c_proj_weight, h_6_attn_c_proj_bias); | |
r_256_768(b248, b245); | |
r_256_768n1(b249, b245, b248); | |
E_256n1(b250, b249); | |
E_256_768(b251, b245, b248, b250, h_6_ln_2_weight, h_6_ln_2_bias); | |
r_256_3072_768(b254, b251, h_6_mlp_c_fc_weight, h_6_mlp_c_fc_bias); | |
E_786432(b257, b254); | |
r_256_768_3072(b258, b245, b257, h_6_mlp_c_proj_weight, h_6_mlp_c_proj_bias); | |
r_256_768(b261, b258); | |
r_256_768n1(b262, b258, b261); | |
E_256n1(b263, b262); | |
E_256_768(b264, b258, b261, b263, h_7_ln_1_weight, h_7_ln_1_bias); | |
r_256_2304_768(b267, b264, h_7_attn_c_attn_weight, h_7_attn_c_attn_bias); | |
r_4_12_64_64_64(b270, b17, b267); | |
r_3072_64(b271, b270); | |
r_3072_64n1(b272, b270, b271); | |
r_3072_64n2(b273, b270, b271); | |
E_3072_64(b274, b270, b271, b272); | |
r_4_12_64_64_64n1(b275, b274, b267); | |
r_4_64_768_768(b276, b258, b275, h_7_attn_c_proj_weight, h_7_attn_c_proj_bias); | |
r_256_768(b279, b276); | |
r_256_768n1(b280, b276, b279); | |
E_256n1(b281, b280); | |
E_256_768(b282, b276, b279, b281, h_7_ln_2_weight, h_7_ln_2_bias); | |
r_256_3072_768(b285, b282, h_7_mlp_c_fc_weight, h_7_mlp_c_fc_bias); | |
E_786432(b288, b285); | |
r_256_768_3072(b289, b276, b288, h_7_mlp_c_proj_weight, h_7_mlp_c_proj_bias); | |
r_256_768(b292, b289); | |
r_256_768n1(b293, b289, b292); | |
E_256n1(b294, b293); | |
E_256_768(b295, b289, b292, b294, h_8_ln_1_weight, h_8_ln_1_bias); | |
r_256_2304_768(b298, b295, h_8_attn_c_attn_weight, h_8_attn_c_attn_bias); | |
r_4_12_64_64_64(b301, b19, b298); | |
r_3072_64(b302, b301); | |
r_3072_64n1(b303, b301, b302); | |
r_3072_64n2(b304, b301, b302); | |
E_3072_64(b305, b301, b302, b303); | |
r_4_12_64_64_64n1(b306, b305, b298); | |
r_4_64_768_768(b307, b289, b306, h_8_attn_c_proj_weight, h_8_attn_c_proj_bias); | |
r_256_768(b310, b307); | |
r_256_768n1(b311, b307, b310); | |
E_256n1(b312, b311); | |
E_256_768(b313, b307, b310, b312, h_8_ln_2_weight, h_8_ln_2_bias); | |
r_256_3072_768(b316, b313, h_8_mlp_c_fc_weight, h_8_mlp_c_fc_bias); | |
E_786432(b319, b316); | |
r_256_768_3072(b320, b307, b319, h_8_mlp_c_proj_weight, h_8_mlp_c_proj_bias); | |
r_256_768(b323, b320); | |
r_256_768n1(b324, b320, b323); | |
E_256n1(b325, b324); | |
E_256_768(b326, b320, b323, b325, h_9_ln_1_weight, h_9_ln_1_bias); | |
r_256_2304_768(b329, b326, h_9_attn_c_attn_weight, h_9_attn_c_attn_bias); | |
r_4_12_64_64_64(b332, b21, b329); | |
r_3072_64(b333, b332); | |
r_3072_64n1(b334, b332, b333); | |
r_3072_64n2(b335, b332, b333); | |
E_3072_64(b336, b332, b333, b334); | |
r_4_12_64_64_64n1(b337, b336, b329); | |
r_4_64_768_768(b338, b320, b337, h_9_attn_c_proj_weight, h_9_attn_c_proj_bias); | |
r_256_768(b341, b338); | |
r_256_768n1(b342, b338, b341); | |
E_256n1(b343, b342); | |
E_256_768(b344, b338, b341, b343, h_9_ln_2_weight, h_9_ln_2_bias); | |
r_256_3072_768(b347, b344, h_9_mlp_c_fc_weight, h_9_mlp_c_fc_bias); | |
E_786432(b350, b347); | |
r_256_768_3072(b351, b338, b350, h_9_mlp_c_proj_weight, h_9_mlp_c_proj_bias); | |
r_256_768(b354, b351); | |
r_256_768n1(b355, b351, b354); | |
E_256n1(b356, b355); | |
E_256_768(b357, b351, b354, b356, h_10_ln_1_weight, h_10_ln_1_bias); | |
r_256_2304_768(b360, b357, h_10_attn_c_attn_weight, h_10_attn_c_attn_bias); | |
r_4_12_64_64_64(b363, b23, b360); | |
r_3072_64(b364, b363); | |
r_3072_64n1(b365, b363, b364); | |
r_3072_64n2(b366, b363, b364); | |
E_3072_64(b367, b363, b364, b365); | |
r_4_12_64_64_64n1(b368, b367, b360); | |
r_4_64_768_768(b369, b351, b368, h_10_attn_c_proj_weight, h_10_attn_c_proj_bias); | |
r_256_768(b372, b369); | |
r_256_768n1(b373, b369, b372); | |
E_256n1(b374, b373); | |
E_256_768(b375, b369, b372, b374, h_10_ln_2_weight, h_10_ln_2_bias); | |
r_256_3072_768(b378, b375, h_10_mlp_c_fc_weight, h_10_mlp_c_fc_bias); | |
E_786432(b381, b378); | |
r_256_768_3072(b382, b369, b381, h_10_mlp_c_proj_weight, h_10_mlp_c_proj_bias); | |
r_256_768(b385, b382); | |
r_256_768n1(b386, b382, b385); | |
E_256n1(b387, b386); | |
E_256_768(b388, b382, b385, b387, h_11_ln_1_weight, h_11_ln_1_bias); | |
r_256_2304_768(b391, b388, h_11_attn_c_attn_weight, h_11_attn_c_attn_bias); | |
r_4_12_64_64_64(b394, b25, b391); | |
r_3072_64(b395, b394); | |
r_3072_64n1(b396, b394, b395); | |
r_3072_64n2(b397, b394, b395); | |
E_3072_64(b398, b394, b395, b396); | |
r_4_12_64_64_64n1(b399, b398, b391); | |
r_4_64_768_768(b400, b382, b399, h_11_attn_c_proj_weight, h_11_attn_c_proj_bias); | |
r_256_768(b403, b400); | |
r_256_768n1(b404, b400, b403); | |
E_256n1(b405, b404); | |
E_256_768(b406, b400, b403, b405, h_11_ln_2_weight, h_11_ln_2_bias); | |
r_256_3072_768(b409, b406, h_11_mlp_c_fc_weight, h_11_mlp_c_fc_bias); | |
E_786432(b412, b409); | |
r_256_768_3072(b413, b400, b412, h_11_mlp_c_proj_weight, h_11_mlp_c_proj_bias); | |
r_256_768(b416, b413); | |
r_256_768n1(b417, b413, b416); | |
E_256n1(b418, b417); | |
E_256_768(b419, b413, b416, b418, ln_f_weight, ln_f_bias); | |
r_256_50257_768(b422, b419, lm_head_weight); | |
r_256_50257(b423, b422); | |
r_256_50257n1(b424, b422, b423); | |
r_256_50257n2(b425, b422, b423); | |
E_256n2(b426, b425); | |
r_256_50257n3(b427, Y, b28, b38, b43, b425); | |
r_256_50257n4(loss, b422, b423, b426, Y, b28, b38, b39); | |
r_256_50257n5(b426, Y, b28, b38, b43, b422, b423, b427); | |
E_256_50257(b429, Y, b28, b38, b43, b422, b423, b427, b424, b426); | |
r_256_768_50257(b430, lm_head_weight, b429); | |
r2_768_256(adam_m_ln_f_weight, adam_v_ln_f_weight, adam_b1, b29, b413, b416, b418, b430, adam_b2, b31); | |
r2_768_256n1(adam_m_ln_f_bias, adam_v_ln_f_bias, adam_b1, b29, b430, adam_b2, b31); | |
r_256_768n2(b426, b413, b416, ln_f_weight, b430, b418, b417); | |
E_768(ln_f_bias, adam_lr, adam_m_ln_f_bias, b33, adam_v_ln_f_bias, b34); | |
r_256_768n3(b417, b418, ln_f_weight, b430, b413, b416, b426); | |
E_256_768n1(b436, b418, ln_f_weight, b430, b413, b416, b426, b417); | |
r2_768_3072_256(adam_m_h_11_mlp_c_proj_weight, adam_v_h_11_mlp_c_proj_weight, adam_b1, b29, b412, b436, adam_b2, b31); | |
r2_768_256n1(adam_m_h_11_mlp_c_proj_bias, adam_v_h_11_mlp_c_proj_bias, adam_b1, b29, b436, adam_b2, b31); | |
r_256_3072_768n1(b441, b409, h_11_mlp_c_proj_weight, b436); | |
E_768(ln_f_weight, adam_lr, adam_m_ln_f_weight, b33, adam_v_ln_f_weight, b34); | |
E_768(h_11_mlp_c_proj_bias, adam_lr, adam_m_h_11_mlp_c_proj_bias, b33, adam_v_h_11_mlp_c_proj_bias, b34); | |
r2_3072_768_256(adam_m_h_11_mlp_c_fc_weight, adam_v_h_11_mlp_c_fc_weight, adam_b1, b29, b406, b441, adam_b2, b31); | |
r2_3072_256(adam_m_h_11_mlp_c_fc_bias, adam_v_h_11_mlp_c_fc_bias, adam_b1, b29, b441, adam_b2, b31); | |
E_2359296(h_11_mlp_c_proj_weight, adam_lr, adam_m_h_11_mlp_c_proj_weight, b33, adam_v_h_11_mlp_c_proj_weight, b34); | |
r_256_768_3072n1(b446, h_11_mlp_c_fc_weight, b441); | |
E_3072(h_11_mlp_c_fc_bias, adam_lr, adam_m_h_11_mlp_c_fc_bias, b33, adam_v_h_11_mlp_c_fc_bias, b34); | |
r2_768_256(adam_m_h_11_ln_2_weight, adam_v_h_11_ln_2_weight, adam_b1, b29, b400, b403, b405, b446, adam_b2, b31); | |
r2_768_256n1(adam_m_h_11_ln_2_bias, adam_v_h_11_ln_2_bias, adam_b1, b29, b446, adam_b2, b31); | |
r_256_768n2(b417, b400, b403, h_11_ln_2_weight, b446, b405, b404); | |
E_2359296n1(h_11_mlp_c_fc_weight, adam_lr, adam_m_h_11_mlp_c_fc_weight, b33, adam_v_h_11_mlp_c_fc_weight, b34); | |
E_768(h_11_ln_2_bias, adam_lr, adam_m_h_11_ln_2_bias, b33, adam_v_h_11_ln_2_bias, b34); | |
r_256_768n3(b404, b405, h_11_ln_2_weight, b446, b400, b403, b417); | |
E_256_768n2(b451, b436, b405, h_11_ln_2_weight, b446, b400, b403, b417, b404); | |
r2_768_768_4_64(adam_m_h_11_attn_c_proj_weight, adam_v_h_11_attn_c_proj_weight, adam_b1, b29, b399, b451, adam_b2, b31); | |
r2_768_256n1(adam_m_h_11_attn_c_proj_bias, adam_v_h_11_attn_c_proj_bias, adam_b1, b29, b451, adam_b2, b31); | |
r_256_768_768(b456, h_11_attn_c_proj_weight, b451); | |
E_768(h_11_ln_2_weight, adam_lr, adam_m_h_11_ln_2_weight, b33, adam_v_h_11_ln_2_weight, b34); | |
E_768(h_11_attn_c_proj_bias, adam_lr, adam_m_h_11_attn_c_proj_bias, b33, adam_v_h_11_attn_c_proj_bias, b34); | |
E_589824(h_11_attn_c_proj_weight, adam_lr, adam_m_h_11_attn_c_proj_weight, b33, adam_v_h_11_attn_c_proj_weight, b34); | |
r_4_12_64_64_64n2(b457, b398, b456); | |
r_4_12_64_64_64n3(b398, b391, b456); | |
r_3072_64n3(b458, b398, b394, b395, b396); | |
r_3072_64n4(b459, b394, b395, b398, b396, b458); | |
E_48_64_64(b456, b25, b394, b395, b398, b396, b458, b397, b459); | |
r_4_12_64_64_64n4(b398, b391, b456); | |
r_4_12_64_64_64n5(b394, b391, b456); | |
E_4_64_2304(b460, b457, b398, b394); | |
r2_2304_768_256(adam_m_h_11_attn_c_attn_weight, adam_v_h_11_attn_c_attn_weight, adam_b1, b29, b388, b460, adam_b2, b31); | |
r2_2304_256(adam_m_h_11_attn_c_attn_bias, adam_v_h_11_attn_c_attn_bias, adam_b1, b29, b460, adam_b2, b31); | |
r_256_768_2304(b465, h_11_attn_c_attn_weight, b460); | |
E_2304(h_11_attn_c_attn_bias, adam_lr, adam_m_h_11_attn_c_attn_bias, b33, adam_v_h_11_attn_c_attn_bias, b34); | |
r2_768_256(adam_m_h_11_ln_1_weight, adam_v_h_11_ln_1_weight, adam_b1, b29, b382, b385, b387, b465, adam_b2, b31); | |
r2_768_256n1(adam_m_h_11_ln_1_bias, adam_v_h_11_ln_1_bias, adam_b1, b29, b465, adam_b2, b31); | |
r_256_768n2(b404, b382, b385, h_11_ln_1_weight, b465, b387, b386); | |
E_1769472(h_11_attn_c_attn_weight, adam_lr, adam_m_h_11_attn_c_attn_weight, b33, adam_v_h_11_attn_c_attn_weight, b34); | |
E_768(h_11_ln_1_bias, adam_lr, adam_m_h_11_ln_1_bias, b33, adam_v_h_11_ln_1_bias, b34); | |
r_256_768n3(b386, b387, h_11_ln_1_weight, b465, b382, b385, b404); | |
E_256_768n2(b470, b451, b387, h_11_ln_1_weight, b465, b382, b385, b404, b386); | |
r2_768_3072_256(adam_m_h_10_mlp_c_proj_weight, adam_v_h_10_mlp_c_proj_weight, adam_b1, b29, b381, b470, adam_b2, b31); | |
r2_768_256n1(adam_m_h_10_mlp_c_proj_bias, adam_v_h_10_mlp_c_proj_bias, adam_b1, b29, b470, adam_b2, b31); | |
r_256_3072_768n1(b475, b378, h_10_mlp_c_proj_weight, b470); | |
E_768(h_11_ln_1_weight, adam_lr, adam_m_h_11_ln_1_weight, b33, adam_v_h_11_ln_1_weight, b34); | |
E_768(h_10_mlp_c_proj_bias, adam_lr, adam_m_h_10_mlp_c_proj_bias, b33, adam_v_h_10_mlp_c_proj_bias, b34); | |
r2_3072_768_256(adam_m_h_10_mlp_c_fc_weight, adam_v_h_10_mlp_c_fc_weight, adam_b1, b29, b375, b475, adam_b2, b31); | |
r2_3072_256(adam_m_h_10_mlp_c_fc_bias, adam_v_h_10_mlp_c_fc_bias, adam_b1, b29, b475, adam_b2, b31); | |
E_2359296(h_10_mlp_c_proj_weight, adam_lr, adam_m_h_10_mlp_c_proj_weight, b33, adam_v_h_10_mlp_c_proj_weight, b34); | |
r_256_768_3072n1(b480, h_10_mlp_c_fc_weight, b475); | |
E_3072(h_10_mlp_c_fc_bias, adam_lr, adam_m_h_10_mlp_c_fc_bias, b33, adam_v_h_10_mlp_c_fc_bias, b34); | |
r2_768_256(adam_m_h_10_ln_2_weight, adam_v_h_10_ln_2_weight, adam_b1, b29, b369, b372, b374, b480, adam_b2, b31); | |
r2_768_256n1(adam_m_h_10_ln_2_bias, adam_v_h_10_ln_2_bias, adam_b1, b29, b480, adam_b2, b31); | |
r_256_768n2(b386, b369, b372, h_10_ln_2_weight, b480, b374, b373); | |
E_2359296n1(h_10_mlp_c_fc_weight, adam_lr, adam_m_h_10_mlp_c_fc_weight, b33, adam_v_h_10_mlp_c_fc_weight, b34); | |
E_768(h_10_ln_2_bias, adam_lr, adam_m_h_10_ln_2_bias, b33, adam_v_h_10_ln_2_bias, b34); | |
r_256_768n3(b373, b374, h_10_ln_2_weight, b480, b369, b372, b386); | |
E_256_768n2(b485, b470, b374, h_10_ln_2_weight, b480, b369, b372, b386, b373); | |
r2_768_768_4_64(adam_m_h_10_attn_c_proj_weight, adam_v_h_10_attn_c_proj_weight, adam_b1, b29, b368, b485, adam_b2, b31); | |
r2_768_256n1(adam_m_h_10_attn_c_proj_bias, adam_v_h_10_attn_c_proj_bias, adam_b1, b29, b485, adam_b2, b31); | |
r_256_768_768(b394, h_10_attn_c_proj_weight, b485); | |
E_768(h_10_ln_2_weight, adam_lr, adam_m_h_10_ln_2_weight, b33, adam_v_h_10_ln_2_weight, b34); | |
E_768(h_10_attn_c_proj_bias, adam_lr, adam_m_h_10_attn_c_proj_bias, b33, adam_v_h_10_attn_c_proj_bias, b34); | |
E_589824(h_10_attn_c_proj_weight, adam_lr, adam_m_h_10_attn_c_proj_weight, b33, adam_v_h_10_attn_c_proj_weight, b34); | |
r_4_12_64_64_64n2(b398, b367, b394); | |
r_4_12_64_64_64n3(b367, b360, b394); | |
r_3072_64n3(b459, b367, b363, b364, b365); | |
r_3072_64n4(b397, b363, b364, b367, b365, b459); | |
E_48_64_64(b394, b23, b363, b364, b367, b365, b459, b366, b397); | |
r_4_12_64_64_64n4(b367, b360, b394); | |
r_4_12_64_64_64n5(b363, b360, b394); | |
E_4_64_2304(b490, b398, b367, b363); | |
r2_2304_768_256(adam_m_h_10_attn_c_attn_weight, adam_v_h_10_attn_c_attn_weight, adam_b1, b29, b357, b490, adam_b2, b31); | |
r2_2304_256(adam_m_h_10_attn_c_attn_bias, adam_v_h_10_attn_c_attn_bias, adam_b1, b29, b490, adam_b2, b31); | |
r_256_768_2304(b495, h_10_attn_c_attn_weight, b490); | |
E_2304(h_10_attn_c_attn_bias, adam_lr, adam_m_h_10_attn_c_attn_bias, b33, adam_v_h_10_attn_c_attn_bias, b34); | |
r2_768_256(adam_m_h_10_ln_1_weight, adam_v_h_10_ln_1_weight, adam_b1, b29, b351, b354, b356, b495, adam_b2, b31); | |
r2_768_256n1(adam_m_h_10_ln_1_bias, adam_v_h_10_ln_1_bias, adam_b1, b29, b495, adam_b2, b31); | |
r_256_768n2(b373, b351, b354, h_10_ln_1_weight, b495, b356, b355); | |
E_1769472(h_10_attn_c_attn_weight, adam_lr, adam_m_h_10_attn_c_attn_weight, b33, adam_v_h_10_attn_c_attn_weight, b34); | |
E_768(h_10_ln_1_bias, adam_lr, adam_m_h_10_ln_1_bias, b33, adam_v_h_10_ln_1_bias, b34); | |
r_256_768n3(b355, b356, h_10_ln_1_weight, b495, b351, b354, b373); | |
E_256_768n2(b500, b485, b356, h_10_ln_1_weight, b495, b351, b354, b373, b355); | |
r2_768_3072_256(adam_m_h_9_mlp_c_proj_weight, adam_v_h_9_mlp_c_proj_weight, adam_b1, b29, b350, b500, adam_b2, b31); | |
r2_768_256n1(adam_m_h_9_mlp_c_proj_bias, adam_v_h_9_mlp_c_proj_bias, adam_b1, b29, b500, adam_b2, b31); | |
r_256_3072_768n1(b505, b347, h_9_mlp_c_proj_weight, b500); | |
E_768(h_10_ln_1_weight, adam_lr, adam_m_h_10_ln_1_weight, b33, adam_v_h_10_ln_1_weight, b34); | |
E_768(h_9_mlp_c_proj_bias, adam_lr, adam_m_h_9_mlp_c_proj_bias, b33, adam_v_h_9_mlp_c_proj_bias, b34); | |
r2_3072_768_256(adam_m_h_9_mlp_c_fc_weight, adam_v_h_9_mlp_c_fc_weight, adam_b1, b29, b344, b505, adam_b2, b31); | |
r2_3072_256(adam_m_h_9_mlp_c_fc_bias, adam_v_h_9_mlp_c_fc_bias, adam_b1, b29, b505, adam_b2, b31); | |
E_2359296(h_9_mlp_c_proj_weight, adam_lr, adam_m_h_9_mlp_c_proj_weight, b33, adam_v_h_9_mlp_c_proj_weight, b34); | |
r_256_768_3072n1(b510, h_9_mlp_c_fc_weight, b505); | |
E_3072(h_9_mlp_c_fc_bias, adam_lr, adam_m_h_9_mlp_c_fc_bias, b33, adam_v_h_9_mlp_c_fc_bias, b34); | |
r2_768_256(adam_m_h_9_ln_2_weight, adam_v_h_9_ln_2_weight, adam_b1, b29, b338, b341, b343, b510, adam_b2, b31); | |
r2_768_256n1(adam_m_h_9_ln_2_bias, adam_v_h_9_ln_2_bias, adam_b1, b29, b510, adam_b2, b31); | |
r_256_768n2(b355, b338, b341, h_9_ln_2_weight, b510, b343, b342); | |
E_2359296n1(h_9_mlp_c_fc_weight, adam_lr, adam_m_h_9_mlp_c_fc_weight, b33, adam_v_h_9_mlp_c_fc_weight, b34); | |
E_768(h_9_ln_2_bias, adam_lr, adam_m_h_9_ln_2_bias, b33, adam_v_h_9_ln_2_bias, b34); | |
r_256_768n3(b342, b343, h_9_ln_2_weight, b510, b338, b341, b355); | |
E_256_768n2(b515, b500, b343, h_9_ln_2_weight, b510, b338, b341, b355, b342); | |
r2_768_768_4_64(adam_m_h_9_attn_c_proj_weight, adam_v_h_9_attn_c_proj_weight, adam_b1, b29, b337, b515, adam_b2, b31); | |
r2_768_256n1(adam_m_h_9_attn_c_proj_bias, adam_v_h_9_attn_c_proj_bias, adam_b1, b29, b515, adam_b2, b31); | |
r_256_768_768(b363, h_9_attn_c_proj_weight, b515); | |
E_768(h_9_ln_2_weight, adam_lr, adam_m_h_9_ln_2_weight, b33, adam_v_h_9_ln_2_weight, b34); | |
E_768(h_9_attn_c_proj_bias, adam_lr, adam_m_h_9_attn_c_proj_bias, b33, adam_v_h_9_attn_c_proj_bias, b34); | |
E_589824(h_9_attn_c_proj_weight, adam_lr, adam_m_h_9_attn_c_proj_weight, b33, adam_v_h_9_attn_c_proj_weight, b34); | |
r_4_12_64_64_64n2(b367, b336, b363); | |
r_4_12_64_64_64n3(b336, b329, b363); | |
r_3072_64n3(b397, b336, b332, b333, b334); | |
r_3072_64n4(b366, b332, b333, b336, b334, b397); | |
E_48_64_64(b363, b21, b332, b333, b336, b334, b397, b335, b366); | |
r_4_12_64_64_64n4(b336, b329, b363); | |
r_4_12_64_64_64n5(b332, b329, b363); | |
E_4_64_2304(b520, b367, b336, b332); | |
r2_2304_768_256(adam_m_h_9_attn_c_attn_weight, adam_v_h_9_attn_c_attn_weight, adam_b1, b29, b326, b520, adam_b2, b31); | |
r2_2304_256(adam_m_h_9_attn_c_attn_bias, adam_v_h_9_attn_c_attn_bias, adam_b1, b29, b520, adam_b2, b31); | |
r_256_768_2304(b525, h_9_attn_c_attn_weight, b520); | |
E_2304(h_9_attn_c_attn_bias, adam_lr, adam_m_h_9_attn_c_attn_bias, b33, adam_v_h_9_attn_c_attn_bias, b34); | |
r2_768_256(adam_m_h_9_ln_1_weight, adam_v_h_9_ln_1_weight, adam_b1, b29, b320, b323, b325, b525, adam_b2, b31); | |
r2_768_256n1(adam_m_h_9_ln_1_bias, adam_v_h_9_ln_1_bias, adam_b1, b29, b525, adam_b2, b31); | |
r_256_768n2(b342, b320, b323, h_9_ln_1_weight, b525, b325, b324); | |
E_1769472(h_9_attn_c_attn_weight, adam_lr, adam_m_h_9_attn_c_attn_weight, b33, adam_v_h_9_attn_c_attn_weight, b34); | |
E_768(h_9_ln_1_bias, adam_lr, adam_m_h_9_ln_1_bias, b33, adam_v_h_9_ln_1_bias, b34); | |
r_256_768n3(b324, b325, h_9_ln_1_weight, b525, b320, b323, b342); | |
E_256_768n2(b530, b515, b325, h_9_ln_1_weight, b525, b320, b323, b342, b324); | |
r2_768_3072_256(adam_m_h_8_mlp_c_proj_weight, adam_v_h_8_mlp_c_proj_weight, adam_b1, b29, b319, b530, adam_b2, b31); | |
r2_768_256n1(adam_m_h_8_mlp_c_proj_bias, adam_v_h_8_mlp_c_proj_bias, adam_b1, b29, b530, adam_b2, b31); | |
r_256_3072_768n1(b535, b316, h_8_mlp_c_proj_weight, b530); | |
E_768(h_9_ln_1_weight, adam_lr, adam_m_h_9_ln_1_weight, b33, adam_v_h_9_ln_1_weight, b34); | |
E_768(h_8_mlp_c_proj_bias, adam_lr, adam_m_h_8_mlp_c_proj_bias, b33, adam_v_h_8_mlp_c_proj_bias, b34); | |
r2_3072_768_256(adam_m_h_8_mlp_c_fc_weight, adam_v_h_8_mlp_c_fc_weight, adam_b1, b29, b313, b535, adam_b2, b31); | |
r2_3072_256(adam_m_h_8_mlp_c_fc_bias, adam_v_h_8_mlp_c_fc_bias, adam_b1, b29, b535, adam_b2, b31); | |
E_2359296(h_8_mlp_c_proj_weight, adam_lr, adam_m_h_8_mlp_c_proj_weight, b33, adam_v_h_8_mlp_c_proj_weight, b34); | |
r_256_768_3072n1(b540, h_8_mlp_c_fc_weight, b535); | |
E_3072(h_8_mlp_c_fc_bias, adam_lr, adam_m_h_8_mlp_c_fc_bias, b33, adam_v_h_8_mlp_c_fc_bias, b34); | |
r2_768_256(adam_m_h_8_ln_2_weight, adam_v_h_8_ln_2_weight, adam_b1, b29, b307, b310, b312, b540, adam_b2, b31); | |
r2_768_256n1(adam_m_h_8_ln_2_bias, adam_v_h_8_ln_2_bias, adam_b1, b29, b540, adam_b2, b31); | |
r_256_768n2(b324, b307, b310, h_8_ln_2_weight, b540, b312, b311); | |
E_2359296n1(h_8_mlp_c_fc_weight, adam_lr, adam_m_h_8_mlp_c_fc_weight, b33, adam_v_h_8_mlp_c_fc_weight, b34); | |
E_768(h_8_ln_2_bias, adam_lr, adam_m_h_8_ln_2_bias, b33, adam_v_h_8_ln_2_bias, b34); | |
r_256_768n3(b311, b312, h_8_ln_2_weight, b540, b307, b310, b324); | |
E_256_768n2(b545, b530, b312, h_8_ln_2_weight, b540, b307, b310, b324, b311); | |
r2_768_768_4_64(adam_m_h_8_attn_c_proj_weight, adam_v_h_8_attn_c_proj_weight, adam_b1, b29, b306, b545, adam_b2, b31); | |
r2_768_256n1(adam_m_h_8_attn_c_proj_bias, adam_v_h_8_attn_c_proj_bias, adam_b1, b29, b545, adam_b2, b31); | |
r_256_768_768(b332, h_8_attn_c_proj_weight, b545); | |
E_768(h_8_ln_2_weight, adam_lr, adam_m_h_8_ln_2_weight, b33, adam_v_h_8_ln_2_weight, b34); | |
E_768(h_8_attn_c_proj_bias, adam_lr, adam_m_h_8_attn_c_proj_bias, b33, adam_v_h_8_attn_c_proj_bias, b34); | |
E_589824(h_8_attn_c_proj_weight, adam_lr, adam_m_h_8_attn_c_proj_weight, b33, adam_v_h_8_attn_c_proj_weight, b34); | |
r_4_12_64_64_64n2(b336, b305, b332); | |
r_4_12_64_64_64n3(b305, b298, b332); | |
r_3072_64n3(b366, b305, b301, b302, b303); | |
r_3072_64n4(b335, b301, b302, b305, b303, b366); | |
E_48_64_64(b332, b19, b301, b302, b305, b303, b366, b304, b335); | |
r_4_12_64_64_64n4(b305, b298, b332); | |
r_4_12_64_64_64n5(b301, b298, b332); | |
E_4_64_2304(b550, b336, b305, b301); | |
r2_2304_768_256(adam_m_h_8_attn_c_attn_weight, adam_v_h_8_attn_c_attn_weight, adam_b1, b29, b295, b550, adam_b2, b31); | |
r2_2304_256(adam_m_h_8_attn_c_attn_bias, adam_v_h_8_attn_c_attn_bias, adam_b1, b29, b550, adam_b2, b31); | |
r_256_768_2304(b555, h_8_attn_c_attn_weight, b550); | |
E_2304(h_8_attn_c_attn_bias, adam_lr, adam_m_h_8_attn_c_attn_bias, b33, adam_v_h_8_attn_c_attn_bias, b34); | |
r2_768_256(adam_m_h_8_ln_1_weight, adam_v_h_8_ln_1_weight, adam_b1, b29, b289, b292, b294, b555, adam_b2, b31); | |
r2_768_256n1(adam_m_h_8_ln_1_bias, adam_v_h_8_ln_1_bias, adam_b1, b29, b555, adam_b2, b31); | |
r_256_768n2(b311, b289, b292, h_8_ln_1_weight, b555, b294, b293); | |
E_1769472(h_8_attn_c_attn_weight, adam_lr, adam_m_h_8_attn_c_attn_weight, b33, adam_v_h_8_attn_c_attn_weight, b34); | |
E_768(h_8_ln_1_bias, adam_lr, adam_m_h_8_ln_1_bias, b33, adam_v_h_8_ln_1_bias, b34); | |
r_256_768n3(b293, b294, h_8_ln_1_weight, b555, b289, b292, b311); | |
E_256_768n2(b560, b545, b294, h_8_ln_1_weight, b555, b289, b292, b311, b293); | |
r2_768_3072_256(adam_m_h_7_mlp_c_proj_weight, adam_v_h_7_mlp_c_proj_weight, adam_b1, b29, b288, b560, adam_b2, b31); | |
r2_768_256n1(adam_m_h_7_mlp_c_proj_bias, adam_v_h_7_mlp_c_proj_bias, adam_b1, b29, b560, adam_b2, b31); | |
r_256_3072_768n1(b565, b285, h_7_mlp_c_proj_weight, b560); | |
E_768(h_8_ln_1_weight, adam_lr, adam_m_h_8_ln_1_weight, b33, adam_v_h_8_ln_1_weight, b34); | |
E_768(h_7_mlp_c_proj_bias, adam_lr, adam_m_h_7_mlp_c_proj_bias, b33, adam_v_h_7_mlp_c_proj_bias, b34); | |
r2_3072_768_256(adam_m_h_7_mlp_c_fc_weight, adam_v_h_7_mlp_c_fc_weight, adam_b1, b29, b282, b565, adam_b2, b31); | |
r2_3072_256(adam_m_h_7_mlp_c_fc_bias, adam_v_h_7_mlp_c_fc_bias, adam_b1, b29, b565, adam_b2, b31); | |
E_2359296(h_7_mlp_c_proj_weight, adam_lr, adam_m_h_7_mlp_c_proj_weight, b33, adam_v_h_7_mlp_c_proj_weight, b34); | |
r_256_768_3072n1(b570, h_7_mlp_c_fc_weight, b565); | |
E_3072(h_7_mlp_c_fc_bias, adam_lr, adam_m_h_7_mlp_c_fc_bias, b33, adam_v_h_7_mlp_c_fc_bias, b34); | |
r2_768_256(adam_m_h_7_ln_2_weight, adam_v_h_7_ln_2_weight, adam_b1, b29, b276, b279, b281, b570, adam_b2, b31); | |
r2_768_256n1(adam_m_h_7_ln_2_bias, adam_v_h_7_ln_2_bias, adam_b1, b29, b570, adam_b2, b31); | |
r_256_768n2(b293, b276, b279, h_7_ln_2_weight, b570, b281, b280); | |
E_2359296n1(h_7_mlp_c_fc_weight, adam_lr, adam_m_h_7_mlp_c_fc_weight, b33, adam_v_h_7_mlp_c_fc_weight, b34); | |
E_768(h_7_ln_2_bias, adam_lr, adam_m_h_7_ln_2_bias, b33, adam_v_h_7_ln_2_bias, b34); | |
r_256_768n3(b280, b281, h_7_ln_2_weight, b570, b276, b279, b293); | |
E_256_768n2(b575, b560, b281, h_7_ln_2_weight, b570, b276, b279, b293, b280); | |
r2_768_768_4_64(adam_m_h_7_attn_c_proj_weight, adam_v_h_7_attn_c_proj_weight, adam_b1, b29, b275, b575, adam_b2, b31); | |
r2_768_256n1(adam_m_h_7_attn_c_proj_bias, adam_v_h_7_attn_c_proj_bias, adam_b1, b29, b575, adam_b2, b31); | |
r_256_768_768(b301, h_7_attn_c_proj_weight, b575); | |
E_768(h_7_ln_2_weight, adam_lr, adam_m_h_7_ln_2_weight, b33, adam_v_h_7_ln_2_weight, b34); | |
E_768(h_7_attn_c_proj_bias, adam_lr, adam_m_h_7_attn_c_proj_bias, b33, adam_v_h_7_attn_c_proj_bias, b34); | |
E_589824(h_7_attn_c_proj_weight, adam_lr, adam_m_h_7_attn_c_proj_weight, b33, adam_v_h_7_attn_c_proj_weight, b34); | |
r_4_12_64_64_64n2(b305, b274, b301); | |
r_4_12_64_64_64n3(b274, b267, b301); | |
r_3072_64n3(b335, b274, b270, b271, b272); | |
r_3072_64n4(b304, b270, b271, b274, b272, b335); | |
E_48_64_64(b301, b17, b270, b271, b274, b272, b335, b273, b304); | |
r_4_12_64_64_64n4(b274, b267, b301); | |
r_4_12_64_64_64n5(b270, b267, b301); | |
E_4_64_2304(b580, b305, b274, b270); | |
r2_2304_768_256(adam_m_h_7_attn_c_attn_weight, adam_v_h_7_attn_c_attn_weight, adam_b1, b29, b264, b580, adam_b2, b31); | |
r2_2304_256(adam_m_h_7_attn_c_attn_bias, adam_v_h_7_attn_c_attn_bias, adam_b1, b29, b580, adam_b2, b31); | |
r_256_768_2304(b585, h_7_attn_c_attn_weight, b580); | |
E_2304(h_7_attn_c_attn_bias, adam_lr, adam_m_h_7_attn_c_attn_bias, b33, adam_v_h_7_attn_c_attn_bias, b34); | |
r2_768_256(adam_m_h_7_ln_1_weight, adam_v_h_7_ln_1_weight, adam_b1, b29, b258, b261, b263, b585, adam_b2, b31); | |
r2_768_256n1(adam_m_h_7_ln_1_bias, adam_v_h_7_ln_1_bias, adam_b1, b29, b585, adam_b2, b31); | |
r_256_768n2(b280, b258, b261, h_7_ln_1_weight, b585, b263, b262); | |
E_1769472(h_7_attn_c_attn_weight, adam_lr, adam_m_h_7_attn_c_attn_weight, b33, adam_v_h_7_attn_c_attn_weight, b34); | |
E_768(h_7_ln_1_bias, adam_lr, adam_m_h_7_ln_1_bias, b33, adam_v_h_7_ln_1_bias, b34); | |
r_256_768n3(b262, b263, h_7_ln_1_weight, b585, b258, b261, b280); | |
E_256_768n2(b590, b575, b263, h_7_ln_1_weight, b585, b258, b261, b280, b262); | |
r2_768_3072_256(adam_m_h_6_mlp_c_proj_weight, adam_v_h_6_mlp_c_proj_weight, adam_b1, b29, b257, b590, adam_b2, b31); | |
r2_768_256n1(adam_m_h_6_mlp_c_proj_bias, adam_v_h_6_mlp_c_proj_bias, adam_b1, b29, b590, adam_b2, b31); | |
r_256_3072_768n1(b595, b254, h_6_mlp_c_proj_weight, b590); | |
E_768(h_7_ln_1_weight, adam_lr, adam_m_h_7_ln_1_weight, b33, adam_v_h_7_ln_1_weight, b34); | |
E_768(h_6_mlp_c_proj_bias, adam_lr, adam_m_h_6_mlp_c_proj_bias, b33, adam_v_h_6_mlp_c_proj_bias, b34); | |
r2_3072_768_256(adam_m_h_6_mlp_c_fc_weight, adam_v_h_6_mlp_c_fc_weight, adam_b1, b29, b251, b595, adam_b2, b31); | |
r2_3072_256(adam_m_h_6_mlp_c_fc_bias, adam_v_h_6_mlp_c_fc_bias, adam_b1, b29, b595, adam_b2, b31); | |
E_2359296(h_6_mlp_c_proj_weight, adam_lr, adam_m_h_6_mlp_c_proj_weight, b33, adam_v_h_6_mlp_c_proj_weight, b34); | |
r_256_768_3072n1(b600, h_6_mlp_c_fc_weight, b595); | |
E_3072(h_6_mlp_c_fc_bias, adam_lr, adam_m_h_6_mlp_c_fc_bias, b33, adam_v_h_6_mlp_c_fc_bias, b34); | |
r2_768_256(adam_m_h_6_ln_2_weight, adam_v_h_6_ln_2_weight, adam_b1, b29, b245, b248, b250, b600, adam_b2, b31); | |
r2_768_256n1(adam_m_h_6_ln_2_bias, adam_v_h_6_ln_2_bias, adam_b1, b29, b600, adam_b2, b31); | |
r_256_768n2(b262, b245, b248, h_6_ln_2_weight, b600, b250, b249); | |
E_2359296n1(h_6_mlp_c_fc_weight, adam_lr, adam_m_h_6_mlp_c_fc_weight, b33, adam_v_h_6_mlp_c_fc_weight, b34); | |
E_768(h_6_ln_2_bias, adam_lr, adam_m_h_6_ln_2_bias, b33, adam_v_h_6_ln_2_bias, b34); | |
r_256_768n3(b249, b250, h_6_ln_2_weight, b600, b245, b248, b262); | |
E_256_768n2(b605, b590, b250, h_6_ln_2_weight, b600, b245, b248, b262, b249); | |
r2_768_768_4_64(adam_m_h_6_attn_c_proj_weight, adam_v_h_6_attn_c_proj_weight, adam_b1, b29, b244, b605, adam_b2, b31); | |
r2_768_256n1(adam_m_h_6_attn_c_proj_bias, adam_v_h_6_attn_c_proj_bias, adam_b1, b29, b605, adam_b2, b31); | |
r_256_768_768(b270, h_6_attn_c_proj_weight, b605); | |
E_768(h_6_ln_2_weight, adam_lr, adam_m_h_6_ln_2_weight, b33, adam_v_h_6_ln_2_weight, b34); | |
E_768(h_6_attn_c_proj_bias, adam_lr, adam_m_h_6_attn_c_proj_bias, b33, adam_v_h_6_attn_c_proj_bias, b34); | |
E_589824(h_6_attn_c_proj_weight, adam_lr, adam_m_h_6_attn_c_proj_weight, b33, adam_v_h_6_attn_c_proj_weight, b34); | |
r_4_12_64_64_64n2(b274, b243, b270); | |
r_4_12_64_64_64n3(b243, b236, b270); | |
r_3072_64n3(b304, b243, b239, b240, b241); | |
r_3072_64n4(b273, b239, b240, b243, b241, b304); | |
E_48_64_64(b270, b15, b239, b240, b243, b241, b304, b242, b273); | |
r_4_12_64_64_64n4(b243, b236, b270); | |
r_4_12_64_64_64n5(b239, b236, b270); | |
E_4_64_2304(b610, b274, b243, b239); | |
r2_2304_768_256(adam_m_h_6_attn_c_attn_weight, adam_v_h_6_attn_c_attn_weight, adam_b1, b29, b233, b610, adam_b2, b31); | |
r2_2304_256(adam_m_h_6_attn_c_attn_bias, adam_v_h_6_attn_c_attn_bias, adam_b1, b29, b610, adam_b2, b31); | |
r_256_768_2304(b615, h_6_attn_c_attn_weight, b610); | |
E_2304(h_6_attn_c_attn_bias, adam_lr, adam_m_h_6_attn_c_attn_bias, b33, adam_v_h_6_attn_c_attn_bias, b34); | |
r2_768_256(adam_m_h_6_ln_1_weight, adam_v_h_6_ln_1_weight, adam_b1, b29, b227, b230, b232, b615, adam_b2, b31); | |
r2_768_256n1(adam_m_h_6_ln_1_bias, adam_v_h_6_ln_1_bias, adam_b1, b29, b615, adam_b2, b31); | |
r_256_768n2(b249, b227, b230, h_6_ln_1_weight, b615, b232, b231); | |
E_1769472(h_6_attn_c_attn_weight, adam_lr, adam_m_h_6_attn_c_attn_weight, b33, adam_v_h_6_attn_c_attn_weight, b34); | |
E_768(h_6_ln_1_bias, adam_lr, adam_m_h_6_ln_1_bias, b33, adam_v_h_6_ln_1_bias, b34); | |
r_256_768n3(b231, b232, h_6_ln_1_weight, b615, b227, b230, b249); | |
E_256_768n2(b620, b605, b232, h_6_ln_1_weight, b615, b227, b230, b249, b231); | |
r2_768_3072_256(adam_m_h_5_mlp_c_proj_weight, adam_v_h_5_mlp_c_proj_weight, adam_b1, b29, b226, b620, adam_b2, b31); | |
r2_768_256n1(adam_m_h_5_mlp_c_proj_bias, adam_v_h_5_mlp_c_proj_bias, adam_b1, b29, b620, adam_b2, b31); | |
r_256_3072_768n1(b625, b223, h_5_mlp_c_proj_weight, b620); | |
E_768(h_6_ln_1_weight, adam_lr, adam_m_h_6_ln_1_weight, b33, adam_v_h_6_ln_1_weight, b34); | |
E_768(h_5_mlp_c_proj_bias, adam_lr, adam_m_h_5_mlp_c_proj_bias, b33, adam_v_h_5_mlp_c_proj_bias, b34); | |
r2_3072_768_256(adam_m_h_5_mlp_c_fc_weight, adam_v_h_5_mlp_c_fc_weight, adam_b1, b29, b220, b625, adam_b2, b31); | |
r2_3072_256(adam_m_h_5_mlp_c_fc_bias, adam_v_h_5_mlp_c_fc_bias, adam_b1, b29, b625, adam_b2, b31); | |
E_2359296(h_5_mlp_c_proj_weight, adam_lr, adam_m_h_5_mlp_c_proj_weight, b33, adam_v_h_5_mlp_c_proj_weight, b34); | |
r_256_768_3072n1(b630, h_5_mlp_c_fc_weight, b625); | |
E_3072(h_5_mlp_c_fc_bias, adam_lr, adam_m_h_5_mlp_c_fc_bias, b33, adam_v_h_5_mlp_c_fc_bias, b34); | |
r2_768_256(adam_m_h_5_ln_2_weight, adam_v_h_5_ln_2_weight, adam_b1, b29, b214, b217, b219, b630, adam_b2, b31); | |
r2_768_256n1(adam_m_h_5_ln_2_bias, adam_v_h_5_ln_2_bias, adam_b1, b29, b630, adam_b2, b31); | |
r_256_768n2(b231, b214, b217, h_5_ln_2_weight, b630, b219, b218); | |
E_2359296n1(h_5_mlp_c_fc_weight, adam_lr, adam_m_h_5_mlp_c_fc_weight, b33, adam_v_h_5_mlp_c_fc_weight, b34); | |
E_768(h_5_ln_2_bias, adam_lr, adam_m_h_5_ln_2_bias, b33, adam_v_h_5_ln_2_bias, b34); | |
r_256_768n3(b218, b219, h_5_ln_2_weight, b630, b214, b217, b231); | |
E_256_768n2(b635, b620, b219, h_5_ln_2_weight, b630, b214, b217, b231, b218); | |
r2_768_768_4_64(adam_m_h_5_attn_c_proj_weight, adam_v_h_5_attn_c_proj_weight, adam_b1, b29, b213, b635, adam_b2, b31); | |
r2_768_256n1(adam_m_h_5_attn_c_proj_bias, adam_v_h_5_attn_c_proj_bias, adam_b1, b29, b635, adam_b2, b31); | |
r_256_768_768(b239, h_5_attn_c_proj_weight, b635); | |
E_768(h_5_ln_2_weight, adam_lr, adam_m_h_5_ln_2_weight, b33, adam_v_h_5_ln_2_weight, b34); | |
E_768(h_5_attn_c_proj_bias, adam_lr, adam_m_h_5_attn_c_proj_bias, b33, adam_v_h_5_attn_c_proj_bias, b34); | |
E_589824(h_5_attn_c_proj_weight, adam_lr, adam_m_h_5_attn_c_proj_weight, b33, adam_v_h_5_attn_c_proj_weight, b34); | |
r_4_12_64_64_64n2(b243, b212, b239); | |
r_4_12_64_64_64n3(b212, b205, b239); | |
r_3072_64n3(b273, b212, b208, b209, b210); | |
r_3072_64n4(b242, b208, b209, b212, b210, b273); | |
E_48_64_64(b239, b13, b208, b209, b212, b210, b273, b211, b242); | |
r_4_12_64_64_64n4(b212, b205, b239); | |
r_4_12_64_64_64n5(b208, b205, b239); | |
E_4_64_2304(b640, b243, b212, b208); | |
r2_2304_768_256(adam_m_h_5_attn_c_attn_weight, adam_v_h_5_attn_c_attn_weight, adam_b1, b29, b202, b640, adam_b2, b31); | |
r2_2304_256(adam_m_h_5_attn_c_attn_bias, adam_v_h_5_attn_c_attn_bias, adam_b1, b29, b640, adam_b2, b31); | |
r_256_768_2304(b645, h_5_attn_c_attn_weight, b640); | |
E_2304(h_5_attn_c_attn_bias, adam_lr, adam_m_h_5_attn_c_attn_bias, b33, adam_v_h_5_attn_c_attn_bias, b34); | |
r2_768_256(adam_m_h_5_ln_1_weight, adam_v_h_5_ln_1_weight, adam_b1, b29, b196, b199, b201, b645, adam_b2, b31); | |
r2_768_256n1(adam_m_h_5_ln_1_bias, adam_v_h_5_ln_1_bias, adam_b1, b29, b645, adam_b2, b31); | |
r_256_768n2(b218, b196, b199, h_5_ln_1_weight, b645, b201, b200); | |
E_1769472(h_5_attn_c_attn_weight, adam_lr, adam_m_h_5_attn_c_attn_weight, b33, adam_v_h_5_attn_c_attn_weight, b34); | |
E_768(h_5_ln_1_bias, adam_lr, adam_m_h_5_ln_1_bias, b33, adam_v_h_5_ln_1_bias, b34); | |
r_256_768n3(b200, b201, h_5_ln_1_weight, b645, b196, b199, b218); | |
E_256_768n2(b650, b635, b201, h_5_ln_1_weight, b645, b196, b199, b218, b200); | |
r2_768_3072_256(adam_m_h_4_mlp_c_proj_weight, adam_v_h_4_mlp_c_proj_weight, adam_b1, b29, b195, b650, adam_b2, b31); | |
r2_768_256n1(adam_m_h_4_mlp_c_proj_bias, adam_v_h_4_mlp_c_proj_bias, adam_b1, b29, b650, adam_b2, b31); | |
r_256_3072_768n1(b655, b192, h_4_mlp_c_proj_weight, b650); | |
E_768(h_5_ln_1_weight, adam_lr, adam_m_h_5_ln_1_weight, b33, adam_v_h_5_ln_1_weight, b34); | |
E_768(h_4_mlp_c_proj_bias, adam_lr, adam_m_h_4_mlp_c_proj_bias, b33, adam_v_h_4_mlp_c_proj_bias, b34); | |
r2_3072_768_256(adam_m_h_4_mlp_c_fc_weight, adam_v_h_4_mlp_c_fc_weight, adam_b1, b29, b189, b655, adam_b2, b31); | |
r2_3072_256(adam_m_h_4_mlp_c_fc_bias, adam_v_h_4_mlp_c_fc_bias, adam_b1, b29, b655, adam_b2, b31); | |
E_2359296(h_4_mlp_c_proj_weight, adam_lr, adam_m_h_4_mlp_c_proj_weight, b33, adam_v_h_4_mlp_c_proj_weight, b34); | |
r_256_768_3072n1(b660, h_4_mlp_c_fc_weight, b655); | |
E_3072(h_4_mlp_c_fc_bias, adam_lr, adam_m_h_4_mlp_c_fc_bias, b33, adam_v_h_4_mlp_c_fc_bias, b34); | |
r2_768_256(adam_m_h_4_ln_2_weight, adam_v_h_4_ln_2_weight, adam_b1, b29, b183, b186, b188, b660, adam_b2, b31); | |
r2_768_256n1(adam_m_h_4_ln_2_bias, adam_v_h_4_ln_2_bias, adam_b1, b29, b660, adam_b2, b31); | |
r_256_768n2(b200, b183, b186, h_4_ln_2_weight, b660, b188, b187); | |
E_2359296n1(h_4_mlp_c_fc_weight, adam_lr, adam_m_h_4_mlp_c_fc_weight, b33, adam_v_h_4_mlp_c_fc_weight, b34); | |
E_768(h_4_ln_2_bias, adam_lr, adam_m_h_4_ln_2_bias, b33, adam_v_h_4_ln_2_bias, b34); | |
r_256_768n3(b187, b188, h_4_ln_2_weight, b660, b183, b186, b200); | |
E_256_768n2(b665, b650, b188, h_4_ln_2_weight, b660, b183, b186, b200, b187); | |
r2_768_768_4_64(adam_m_h_4_attn_c_proj_weight, adam_v_h_4_attn_c_proj_weight, adam_b1, b29, b182, b665, adam_b2, b31); | |
r2_768_256n1(adam_m_h_4_attn_c_proj_bias, adam_v_h_4_attn_c_proj_bias, adam_b1, b29, b665, adam_b2, b31); | |
r_256_768_768(b208, h_4_attn_c_proj_weight, b665); | |
E_768(h_4_ln_2_weight, adam_lr, adam_m_h_4_ln_2_weight, b33, adam_v_h_4_ln_2_weight, b34); | |
E_768(h_4_attn_c_proj_bias, adam_lr, adam_m_h_4_attn_c_proj_bias, b33, adam_v_h_4_attn_c_proj_bias, b34); | |
E_589824(h_4_attn_c_proj_weight, adam_lr, adam_m_h_4_attn_c_proj_weight, b33, adam_v_h_4_attn_c_proj_weight, b34); | |
r_4_12_64_64_64n2(b212, b181, b208); | |
r_4_12_64_64_64n3(b181, b174, b208); | |
r_3072_64n3(b242, b181, b177, b178, b179); | |
r_3072_64n4(b211, b177, b178, b181, b179, b242); | |
E_48_64_64(b208, b11, b177, b178, b181, b179, b242, b180, b211); | |
r_4_12_64_64_64n4(b181, b174, b208); | |
r_4_12_64_64_64n5(b177, b174, b208); | |
E_4_64_2304(b670, b212, b181, b177); | |
r2_2304_768_256(adam_m_h_4_attn_c_attn_weight, adam_v_h_4_attn_c_attn_weight, adam_b1, b29, b171, b670, adam_b2, b31); | |
r2_2304_256(adam_m_h_4_attn_c_attn_bias, adam_v_h_4_attn_c_attn_bias, adam_b1, b29, b670, adam_b2, b31); | |
r_256_768_2304(b675, h_4_attn_c_attn_weight, b670); | |
E_2304(h_4_attn_c_attn_bias, adam_lr, adam_m_h_4_attn_c_attn_bias, b33, adam_v_h_4_attn_c_attn_bias, b34); | |
r2_768_256(adam_m_h_4_ln_1_weight, adam_v_h_4_ln_1_weight, adam_b1, b29, b165, b168, b170, b675, adam_b2, b31); | |
r2_768_256n1(adam_m_h_4_ln_1_bias, adam_v_h_4_ln_1_bias, adam_b1, b29, b675, adam_b2, b31); | |
r_256_768n2(b187, b165, b168, h_4_ln_1_weight, b675, b170, b169); | |
E_1769472(h_4_attn_c_attn_weight, adam_lr, adam_m_h_4_attn_c_attn_weight, b33, adam_v_h_4_attn_c_attn_weight, b34); | |
E_768(h_4_ln_1_bias, adam_lr, adam_m_h_4_ln_1_bias, b33, adam_v_h_4_ln_1_bias, b34); | |
r_256_768n3(b169, b170, h_4_ln_1_weight, b675, b165, b168, b187); | |
E_256_768n2(b680, b665, b170, h_4_ln_1_weight, b675, b165, b168, b187, b169); | |
r2_768_3072_256(adam_m_h_3_mlp_c_proj_weight, adam_v_h_3_mlp_c_proj_weight, adam_b1, b29, b164, b680, adam_b2, b31); | |
r2_768_256n1(adam_m_h_3_mlp_c_proj_bias, adam_v_h_3_mlp_c_proj_bias, adam_b1, b29, b680, adam_b2, b31); | |
r_256_3072_768n1(b685, b161, h_3_mlp_c_proj_weight, b680); | |
E_768(h_4_ln_1_weight, adam_lr, adam_m_h_4_ln_1_weight, b33, adam_v_h_4_ln_1_weight, b34); | |
E_768(h_3_mlp_c_proj_bias, adam_lr, adam_m_h_3_mlp_c_proj_bias, b33, adam_v_h_3_mlp_c_proj_bias, b34); | |
r2_3072_768_256(adam_m_h_3_mlp_c_fc_weight, adam_v_h_3_mlp_c_fc_weight, adam_b1, b29, b158, b685, adam_b2, b31); | |
r2_3072_256(adam_m_h_3_mlp_c_fc_bias, adam_v_h_3_mlp_c_fc_bias, adam_b1, b29, b685, adam_b2, b31); | |
E_2359296(h_3_mlp_c_proj_weight, adam_lr, adam_m_h_3_mlp_c_proj_weight, b33, adam_v_h_3_mlp_c_proj_weight, b34); | |
r_256_768_3072n1(b690, h_3_mlp_c_fc_weight, b685); | |
E_3072(h_3_mlp_c_fc_bias, adam_lr, adam_m_h_3_mlp_c_fc_bias, b33, adam_v_h_3_mlp_c_fc_bias, b34); | |
r2_768_256(adam_m_h_3_ln_2_weight, adam_v_h_3_ln_2_weight, adam_b1, b29, b152, b155, b157, b690, adam_b2, b31); | |
r2_768_256n1(adam_m_h_3_ln_2_bias, adam_v_h_3_ln_2_bias, adam_b1, b29, b690, adam_b2, b31); | |
r_256_768n2(b169, b152, b155, h_3_ln_2_weight, b690, b157, b156); | |
E_2359296n1(h_3_mlp_c_fc_weight, adam_lr, adam_m_h_3_mlp_c_fc_weight, b33, adam_v_h_3_mlp_c_fc_weight, b34); | |
E_768(h_3_ln_2_bias, adam_lr, adam_m_h_3_ln_2_bias, b33, adam_v_h_3_ln_2_bias, b34); | |
r_256_768n3(b156, b157, h_3_ln_2_weight, b690, b152, b155, b169); | |
E_256_768n2(b695, b680, b157, h_3_ln_2_weight, b690, b152, b155, b169, b156); | |
r2_768_768_4_64(adam_m_h_3_attn_c_proj_weight, adam_v_h_3_attn_c_proj_weight, adam_b1, b29, b151, b695, adam_b2, b31); | |
r2_768_256n1(adam_m_h_3_attn_c_proj_bias, adam_v_h_3_attn_c_proj_bias, adam_b1, b29, b695, adam_b2, b31); | |
r_256_768_768(b177, h_3_attn_c_proj_weight, b695); | |
E_768(h_3_ln_2_weight, adam_lr, adam_m_h_3_ln_2_weight, b33, adam_v_h_3_ln_2_weight, b34); | |
E_768(h_3_attn_c_proj_bias, adam_lr, adam_m_h_3_attn_c_proj_bias, b33, adam_v_h_3_attn_c_proj_bias, b34); | |
E_589824(h_3_attn_c_proj_weight, adam_lr, adam_m_h_3_attn_c_proj_weight, b33, adam_v_h_3_attn_c_proj_weight, b34); | |
r_4_12_64_64_64n2(b181, b150, b177); | |
r_4_12_64_64_64n3(b150, b143, b177); | |
r_3072_64n3(b211, b150, b146, b147, b148); | |
r_3072_64n4(b180, b146, b147, b150, b148, b211); | |
E_48_64_64(b177, b9, b146, b147, b150, b148, b211, b149, b180); | |
r_4_12_64_64_64n4(b150, b143, b177); | |
r_4_12_64_64_64n5(b146, b143, b177); | |
E_4_64_2304(b700, b181, b150, b146); | |
r2_2304_768_256(adam_m_h_3_attn_c_attn_weight, adam_v_h_3_attn_c_attn_weight, adam_b1, b29, b140, b700, adam_b2, b31); | |
r2_2304_256(adam_m_h_3_attn_c_attn_bias, adam_v_h_3_attn_c_attn_bias, adam_b1, b29, b700, adam_b2, b31); | |
r_256_768_2304(b705, h_3_attn_c_attn_weight, b700); | |
E_2304(h_3_attn_c_attn_bias, adam_lr, adam_m_h_3_attn_c_attn_bias, b33, adam_v_h_3_attn_c_attn_bias, b34); | |
r2_768_256(adam_m_h_3_ln_1_weight, adam_v_h_3_ln_1_weight, adam_b1, b29, b134, b137, b139, b705, adam_b2, b31); | |
r2_768_256n1(adam_m_h_3_ln_1_bias, adam_v_h_3_ln_1_bias, adam_b1, b29, b705, adam_b2, b31); | |
r_256_768n2(b156, b134, b137, h_3_ln_1_weight, b705, b139, b138); | |
E_1769472(h_3_attn_c_attn_weight, adam_lr, adam_m_h_3_attn_c_attn_weight, b33, adam_v_h_3_attn_c_attn_weight, b34); | |
E_768(h_3_ln_1_bias, adam_lr, adam_m_h_3_ln_1_bias, b33, adam_v_h_3_ln_1_bias, b34); | |
r_256_768n3(b138, b139, h_3_ln_1_weight, b705, b134, b137, b156); | |
E_256_768n2(b710, b695, b139, h_3_ln_1_weight, b705, b134, b137, b156, b138); | |
r2_768_3072_256(adam_m_h_2_mlp_c_proj_weight, adam_v_h_2_mlp_c_proj_weight, adam_b1, b29, b133, b710, adam_b2, b31); | |
r2_768_256n1(adam_m_h_2_mlp_c_proj_bias, adam_v_h_2_mlp_c_proj_bias, adam_b1, b29, b710, adam_b2, b31); | |
r_256_3072_768n1(b715, b130, h_2_mlp_c_proj_weight, b710); | |
E_768(h_3_ln_1_weight, adam_lr, adam_m_h_3_ln_1_weight, b33, adam_v_h_3_ln_1_weight, b34); | |
E_768(h_2_mlp_c_proj_bias, adam_lr, adam_m_h_2_mlp_c_proj_bias, b33, adam_v_h_2_mlp_c_proj_bias, b34); | |
r2_3072_768_256(adam_m_h_2_mlp_c_fc_weight, adam_v_h_2_mlp_c_fc_weight, adam_b1, b29, b127, b715, adam_b2, b31); | |
r2_3072_256(adam_m_h_2_mlp_c_fc_bias, adam_v_h_2_mlp_c_fc_bias, adam_b1, b29, b715, adam_b2, b31); | |
E_2359296(h_2_mlp_c_proj_weight, adam_lr, adam_m_h_2_mlp_c_proj_weight, b33, adam_v_h_2_mlp_c_proj_weight, b34); | |
r_256_768_3072n1(b720, h_2_mlp_c_fc_weight, b715); | |
E_3072(h_2_mlp_c_fc_bias, adam_lr, adam_m_h_2_mlp_c_fc_bias, b33, adam_v_h_2_mlp_c_fc_bias, b34); | |
r2_768_256(adam_m_h_2_ln_2_weight, adam_v_h_2_ln_2_weight, adam_b1, b29, b121, b124, b126, b720, adam_b2, b31); | |
r2_768_256n1(adam_m_h_2_ln_2_bias, adam_v_h_2_ln_2_bias, adam_b1, b29, b720, adam_b2, b31); | |
r_256_768n2(b138, b121, b124, h_2_ln_2_weight, b720, b126, b125); | |
E_2359296n1(h_2_mlp_c_fc_weight, adam_lr, adam_m_h_2_mlp_c_fc_weight, b33, adam_v_h_2_mlp_c_fc_weight, b34); | |
E_768(h_2_ln_2_bias, adam_lr, adam_m_h_2_ln_2_bias, b33, adam_v_h_2_ln_2_bias, b34); | |
r_256_768n3(b125, b126, h_2_ln_2_weight, b720, b121, b124, b138); | |
E_256_768n2(b725, b710, b126, h_2_ln_2_weight, b720, b121, b124, b138, b125); | |
r2_768_768_4_64(adam_m_h_2_attn_c_proj_weight, adam_v_h_2_attn_c_proj_weight, adam_b1, b29, b120, b725, adam_b2, b31); | |
r2_768_256n1(adam_m_h_2_attn_c_proj_bias, adam_v_h_2_attn_c_proj_bias, adam_b1, b29, b725, adam_b2, b31); | |
r_256_768_768(b146, h_2_attn_c_proj_weight, b725); | |
E_768(h_2_ln_2_weight, adam_lr, adam_m_h_2_ln_2_weight, b33, adam_v_h_2_ln_2_weight, b34); | |
E_768(h_2_attn_c_proj_bias, adam_lr, adam_m_h_2_attn_c_proj_bias, b33, adam_v_h_2_attn_c_proj_bias, b34); | |
E_589824(h_2_attn_c_proj_weight, adam_lr, adam_m_h_2_attn_c_proj_weight, b33, adam_v_h_2_attn_c_proj_weight, b34); | |
r_4_12_64_64_64n2(b150, b119, b146); | |
r_4_12_64_64_64n3(b119, b112, b146); | |
r_3072_64n3(b180, b119, b115, b116, b117); | |
r_3072_64n4(b149, b115, b116, b119, b117, b180); | |
E_48_64_64(b146, b7, b115, b116, b119, b117, b180, b118, b149); | |
r_4_12_64_64_64n4(b119, b112, b146); | |
r_4_12_64_64_64n5(b115, b112, b146); | |
E_4_64_2304(b730, b150, b119, b115); | |
r2_2304_768_256(adam_m_h_2_attn_c_attn_weight, adam_v_h_2_attn_c_attn_weight, adam_b1, b29, b109, b730, adam_b2, b31); | |
r2_2304_256(adam_m_h_2_attn_c_attn_bias, adam_v_h_2_attn_c_attn_bias, adam_b1, b29, b730, adam_b2, b31); | |
r_256_768_2304(b735, h_2_attn_c_attn_weight, b730); | |
E_2304(h_2_attn_c_attn_bias, adam_lr, adam_m_h_2_attn_c_attn_bias, b33, adam_v_h_2_attn_c_attn_bias, b34); | |
r2_768_256(adam_m_h_2_ln_1_weight, adam_v_h_2_ln_1_weight, adam_b1, b29, b103, b106, b108, b735, adam_b2, b31); | |
r2_768_256n1(adam_m_h_2_ln_1_bias, adam_v_h_2_ln_1_bias, adam_b1, b29, b735, adam_b2, b31); | |
r_256_768n2(b125, b103, b106, h_2_ln_1_weight, b735, b108, b107); | |
E_1769472(h_2_attn_c_attn_weight, adam_lr, adam_m_h_2_attn_c_attn_weight, b33, adam_v_h_2_attn_c_attn_weight, b34); | |
E_768(h_2_ln_1_bias, adam_lr, adam_m_h_2_ln_1_bias, b33, adam_v_h_2_ln_1_bias, b34); | |
r_256_768n3(b107, b108, h_2_ln_1_weight, b735, b103, b106, b125); | |
E_256_768n2(b740, b725, b108, h_2_ln_1_weight, b735, b103, b106, b125, b107); | |
r2_768_3072_256(adam_m_h_1_mlp_c_proj_weight, adam_v_h_1_mlp_c_proj_weight, adam_b1, b29, b102, b740, adam_b2, b31); | |
r2_768_256n1(adam_m_h_1_mlp_c_proj_bias, adam_v_h_1_mlp_c_proj_bias, adam_b1, b29, b740, adam_b2, b31); | |
r_256_3072_768n1(b745, b99, h_1_mlp_c_proj_weight, b740); | |
E_768(h_2_ln_1_weight, adam_lr, adam_m_h_2_ln_1_weight, b33, adam_v_h_2_ln_1_weight, b34); | |
E_768(h_1_mlp_c_proj_bias, adam_lr, adam_m_h_1_mlp_c_proj_bias, b33, adam_v_h_1_mlp_c_proj_bias, b34); | |
r2_3072_768_256(adam_m_h_1_mlp_c_fc_weight, adam_v_h_1_mlp_c_fc_weight, adam_b1, b29, b96, b745, adam_b2, b31); | |
r2_3072_256(adam_m_h_1_mlp_c_fc_bias, adam_v_h_1_mlp_c_fc_bias, adam_b1, b29, b745, adam_b2, b31); | |
E_2359296(h_1_mlp_c_proj_weight, adam_lr, adam_m_h_1_mlp_c_proj_weight, b33, adam_v_h_1_mlp_c_proj_weight, b34); | |
r_256_768_3072n1(b750, h_1_mlp_c_fc_weight, b745); | |
E_3072(h_1_mlp_c_fc_bias, adam_lr, adam_m_h_1_mlp_c_fc_bias, b33, adam_v_h_1_mlp_c_fc_bias, b34); | |
r2_768_256(adam_m_h_1_ln_2_weight, adam_v_h_1_ln_2_weight, adam_b1, b29, b90, b93, b95, b750, adam_b2, b31); | |
r2_768_256n1(adam_m_h_1_ln_2_bias, adam_v_h_1_ln_2_bias, adam_b1, b29, b750, adam_b2, b31); | |
r_256_768n2(b107, b90, b93, h_1_ln_2_weight, b750, b95, b94); | |
E_2359296n1(h_1_mlp_c_fc_weight, adam_lr, adam_m_h_1_mlp_c_fc_weight, b33, adam_v_h_1_mlp_c_fc_weight, b34); | |
E_768(h_1_ln_2_bias, adam_lr, adam_m_h_1_ln_2_bias, b33, adam_v_h_1_ln_2_bias, b34); | |
r_256_768n3(b94, b95, h_1_ln_2_weight, b750, b90, b93, b107); | |
E_256_768n2(b755, b740, b95, h_1_ln_2_weight, b750, b90, b93, b107, b94); | |
r2_768_768_4_64(adam_m_h_1_attn_c_proj_weight, adam_v_h_1_attn_c_proj_weight, adam_b1, b29, b89, b755, adam_b2, b31); | |
r2_768_256n1(adam_m_h_1_attn_c_proj_bias, adam_v_h_1_attn_c_proj_bias, adam_b1, b29, b755, adam_b2, b31); | |
r_256_768_768(b115, h_1_attn_c_proj_weight, b755); | |
E_768(h_1_ln_2_weight, adam_lr, adam_m_h_1_ln_2_weight, b33, adam_v_h_1_ln_2_weight, b34); | |
E_768(h_1_attn_c_proj_bias, adam_lr, adam_m_h_1_attn_c_proj_bias, b33, adam_v_h_1_attn_c_proj_bias, b34); | |
E_589824(h_1_attn_c_proj_weight, adam_lr, adam_m_h_1_attn_c_proj_weight, b33, adam_v_h_1_attn_c_proj_weight, b34); | |
r_4_12_64_64_64n2(b119, b88, b115); | |
r_4_12_64_64_64n3(b88, b81, b115); | |
r_3072_64n3(b149, b88, b84, b85, b86); | |
r_3072_64n4(b118, b84, b85, b88, b86, b149); | |
E_48_64_64(b115, b5, b84, b85, b88, b86, b149, b87, b118); | |
r_4_12_64_64_64n4(b88, b81, b115); | |
r_4_12_64_64_64n5(b84, b81, b115); | |
E_4_64_2304(b760, b119, b88, b84); | |
r2_2304_768_256(adam_m_h_1_attn_c_attn_weight, adam_v_h_1_attn_c_attn_weight, adam_b1, b29, b78, b760, adam_b2, b31); | |
r2_2304_256(adam_m_h_1_attn_c_attn_bias, adam_v_h_1_attn_c_attn_bias, adam_b1, b29, b760, adam_b2, b31); | |
r_256_768_2304(b765, h_1_attn_c_attn_weight, b760); | |
E_2304(h_1_attn_c_attn_bias, adam_lr, adam_m_h_1_attn_c_attn_bias, b33, adam_v_h_1_attn_c_attn_bias, b34); | |
r2_768_256(adam_m_h_1_ln_1_weight, adam_v_h_1_ln_1_weight, adam_b1, b29, b72, b75, b77, b765, adam_b2, b31); | |
r2_768_256n1(adam_m_h_1_ln_1_bias, adam_v_h_1_ln_1_bias, adam_b1, b29, b765, adam_b2, b31); | |
r_256_768n2(b94, b72, b75, h_1_ln_1_weight, b765, b77, b76); | |
E_1769472(h_1_attn_c_attn_weight, adam_lr, adam_m_h_1_attn_c_attn_weight, b33, adam_v_h_1_attn_c_attn_weight, b34); | |
E_768(h_1_ln_1_bias, adam_lr, adam_m_h_1_ln_1_bias, b33, adam_v_h_1_ln_1_bias, b34); | |
r_256_768n3(b76, b77, h_1_ln_1_weight, b765, b72, b75, b94); | |
E_256_768n2(b770, b755, b77, h_1_ln_1_weight, b765, b72, b75, b94, b76); | |
r2_768_3072_256(adam_m_h_0_mlp_c_proj_weight, adam_v_h_0_mlp_c_proj_weight, adam_b1, b29, b71, b770, adam_b2, b31); | |
r2_768_256n1(adam_m_h_0_mlp_c_proj_bias, adam_v_h_0_mlp_c_proj_bias, adam_b1, b29, b770, adam_b2, b31); | |
r_256_3072_768n1(b775, b68, h_0_mlp_c_proj_weight, b770); | |
E_768(h_1_ln_1_weight, adam_lr, adam_m_h_1_ln_1_weight, b33, adam_v_h_1_ln_1_weight, b34); | |
E_768(h_0_mlp_c_proj_bias, adam_lr, adam_m_h_0_mlp_c_proj_bias, b33, adam_v_h_0_mlp_c_proj_bias, b34); | |
r2_3072_768_256(adam_m_h_0_mlp_c_fc_weight, adam_v_h_0_mlp_c_fc_weight, adam_b1, b29, b65, b775, adam_b2, b31); | |
r2_3072_256(adam_m_h_0_mlp_c_fc_bias, adam_v_h_0_mlp_c_fc_bias, adam_b1, b29, b775, adam_b2, b31); | |
E_2359296(h_0_mlp_c_proj_weight, adam_lr, adam_m_h_0_mlp_c_proj_weight, b33, adam_v_h_0_mlp_c_proj_weight, b34); | |
r_256_768_3072n1(b780, h_0_mlp_c_fc_weight, b775); | |
E_3072(h_0_mlp_c_fc_bias, adam_lr, adam_m_h_0_mlp_c_fc_bias, b33, adam_v_h_0_mlp_c_fc_bias, b34); | |
r2_768_256(adam_m_h_0_ln_2_weight, adam_v_h_0_ln_2_weight, adam_b1, b29, b59, b62, b64, b780, adam_b2, b31); | |
r2_768_256n1(adam_m_h_0_ln_2_bias, adam_v_h_0_ln_2_bias, adam_b1, b29, b780, adam_b2, b31); | |
r_256_768n2(b76, b59, b62, h_0_ln_2_weight, b780, b64, b63); | |
E_2359296n1(h_0_mlp_c_fc_weight, adam_lr, adam_m_h_0_mlp_c_fc_weight, b33, adam_v_h_0_mlp_c_fc_weight, b34); | |
E_768(h_0_ln_2_bias, adam_lr, adam_m_h_0_ln_2_bias, b33, adam_v_h_0_ln_2_bias, b34); | |
r_256_768n3(b63, b64, h_0_ln_2_weight, b780, b59, b62, b76); | |
E_256_768n2(b785, b770, b64, h_0_ln_2_weight, b780, b59, b62, b76, b63); | |
r2_768_768_4_64(adam_m_h_0_attn_c_proj_weight, adam_v_h_0_attn_c_proj_weight, adam_b1, b29, b58, b785, adam_b2, b31); | |
r2_768_256n1(adam_m_h_0_attn_c_proj_bias, adam_v_h_0_attn_c_proj_bias, adam_b1, b29, b785, adam_b2, b31); | |
r_256_768_768(b84, h_0_attn_c_proj_weight, b785); | |
E_768(h_0_ln_2_weight, adam_lr, adam_m_h_0_ln_2_weight, b33, adam_v_h_0_ln_2_weight, b34); | |
E_768(h_0_attn_c_proj_bias, adam_lr, adam_m_h_0_attn_c_proj_bias, b33, adam_v_h_0_attn_c_proj_bias, b34); | |
E_589824(h_0_attn_c_proj_weight, adam_lr, adam_m_h_0_attn_c_proj_weight, b33, adam_v_h_0_attn_c_proj_weight, b34); | |
r_4_12_64_64_64n2(b88, b57, b84); | |
r_4_12_64_64_64n3(b57, b50, b84); | |
r_3072_64n3(b118, b57, b53, b54, b55); | |
r_3072_64n4(b87, b53, b54, b57, b55, b118); | |
E_48_64_64(b84, b3, b53, b54, b57, b55, b118, b56, b87); | |
r_4_12_64_64_64n4(b57, b50, b84); | |
r_4_12_64_64_64n5(b53, b50, b84); | |
E_4_64_2304(b790, b88, b57, b53); | |
r2_2304_768_256(adam_m_h_0_attn_c_attn_weight, adam_v_h_0_attn_c_attn_weight, adam_b1, b29, b47, b790, adam_b2, b31); | |
r2_2304_256(adam_m_h_0_attn_c_attn_bias, adam_v_h_0_attn_c_attn_bias, adam_b1, b29, b790, adam_b2, b31); | |
r_256_768_2304(b795, h_0_attn_c_attn_weight, b790); | |
E_2304(h_0_attn_c_attn_bias, adam_lr, adam_m_h_0_attn_c_attn_bias, b33, adam_v_h_0_attn_c_attn_bias, b34); | |
r2_768_256(adam_m_h_0_ln_1_weight, adam_v_h_0_ln_1_weight, adam_b1, b29, b40, b44, b46, b795, adam_b2, b31); | |
r2_768_256n1(adam_m_h_0_ln_1_bias, adam_v_h_0_ln_1_bias, adam_b1, b29, b795, adam_b2, b31); | |
r_256_768n2(b63, b40, b44, h_0_ln_1_weight, b795, b46, b45); | |
E_1769472(h_0_attn_c_attn_weight, adam_lr, adam_m_h_0_attn_c_attn_weight, b33, adam_v_h_0_attn_c_attn_weight, b34); | |
E_768(h_0_ln_1_bias, adam_lr, adam_m_h_0_ln_1_bias, b33, adam_v_h_0_ln_1_bias, b34); | |
r_256_768n3(b45, b46, h_0_ln_1_weight, b795, b40, b44, b63); | |
E_256_768n2(b53, b785, b46, h_0_ln_1_weight, b795, b40, b44, b63, b45); | |
E_768(h_0_ln_1_weight, adam_lr, adam_m_h_0_ln_1_weight, b33, adam_v_h_0_ln_1_weight, b34); | |
r_49152_4(b800, b53); | |
r_50257_768_256(b801, X, wte_arange, b53); | |
r2_1024_768_64(adam_m_wpe_weight, adam_v_wpe_weight, adam_b1, b29, b2, wpe_arange, b800, adam_b2, b31); | |
r_50257_768_256n1(grad_lm_head_weight, b419, b429, b801); | |
E_786432n1(wpe_weight, adam_lr, adam_m_wpe_weight, b33, adam_v_wpe_weight, b34); | |
E_38597376(adam_m_lm_head_weight, adam_b1, b29, grad_lm_head_weight); | |
E_38597376n1(adam_v_lm_head_weight, adam_b2, b31, grad_lm_head_weight); | |
E_38597376n2(lm_head_weight, adam_lr, adam_m_lm_head_weight, b33, adam_v_lm_head_weight, b34); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Context: https://twitter.com/__tinygrad__/status/1783692017010389183