Skip to content

Instantly share code, notes, and snippets.

@kaushikcfd
Last active January 16, 2018 16:39
Show Gist options
  • Save kaushikcfd/8d925973632516736e9c69ae8ebfe96e to your computer and use it in GitHub Desktop.
Save kaushikcfd/8d925973632516736e9c69ae8ebfe96e to your computer and use it in GitHub Desktop.

TSFC kernel

static inline void form_cell_integral_otherwise (double  A[18] , const double *const restrict *restrict coords , const double *const restrict *restrict w_0 , const double *const restrict *restrict w_1 , const double *const restrict *restrict w_2 , const double *restrict w_3 , const double *restrict w_4 , const double *restrict w_5 , const double *const restrict *restrict w_6 , const double *const restrict *restrict w_7 , const double *const restrict *restrict w_8 )
{
  double  t26[7] ;
  double  t27[7] ;
  double  t28[7] ;
  double  t29[7] ;
  double  t30[7] ;
  double  t33[3] ;
  static const double  t0[7][3]  = {{0.333333333333333, 0.333333333333333, 0.333333333333333}, 
  {0.101286507323456, 0.797426985353087, 0.101286507323456}, 
  {0.101286507323457, 0.101286507323456, 0.797426985353087}, 
  {0.797426985353087, 0.101286507323456, 0.101286507323456}, 
  {0.470142064105115, 0.0597158717897698, 0.470142064105115}, 
  {0.470142064105115, 0.470142064105115, 0.0597158717897697}, 
  {0.0597158717897699, 0.470142064105115, 0.470142064105115}};
  double  t1  = (-1 * w_2[0][0]);
  double  t2  = (t1 + w_2[1][0]);
  double  t3  = (-1 * coords[0][0]);
  double  t4  = (t3 + coords[2][0]);
  double  t5  = (t3 + coords[1][0]);
  double  t6  = (-1 * coords[0][1]);
  double  t7  = (t6 + coords[2][1]);
  double  t8  = (t6 + coords[1][1]);
  double  t9  = ((t5 * t7) + (-1 * (t4 * t8)));
  double  t10  = (1 / t9);
  double  t11  = ((-1 * t4) * t10);
  double  t12  = (t1 + w_2[2][0]);
  double  t13  = (t5 * t10);
  double  t14  = ((t2 * t11) + (t12 * t13));
  double  t15  = (-1 * w_8[0][0]);
  double  t16  = (t15 + w_8[1][0]);
  double  t17  = (t15 + w_8[2][0]);
  double  t18  = ((t16 * t11) + (t17 * t13));
  double  t19  = (t7 * t10);
  double  t20  = ((-1 * t8) * t10);
  double  t21  = ((t2 * t19) + (t12 * t20));
  double  t22  = ((t16 * t19) + (t17 * t20));
  double  t31  = 0.0;
  double  t23  = fabs(t9);
  static const double  t24[7]  = {0.1125, 0.0629695902724136, 0.0629695902724136, 0.0629695902724136, 0.0661970763942531, 0.0661970763942531, 0.0661970763942531};
  static const double  t25[7][6]  = {{-0.111111111111111, -0.111111111111111, -0.111111111111111, 0.444444444444444, 0.444444444444444, 0.444444444444445}, 
  {-0.0807685941918872, 0.474352608585538, -0.0807685941918872, 0.323074376767549, 0.0410358262631382, 0.323074376767549}, 
  {-0.0807685941918872, -0.0807685941918872, 0.474352608585538, 0.323074376767549, 0.323074376767549, 0.0410358262631385}, 
  {0.474352608585539, -0.0807685941918871, -0.0807685941918872, 0.0410358262631383, 0.323074376767549, 0.323074376767549}, 
  {-0.0280749432230787, -0.0525839011025454, -0.0280749432230788, 0.112299772892315, 0.884134241764072, 0.112299772892315}, 
  {-0.0280749432230787, -0.0280749432230788, -0.0525839011025453, 0.112299772892315, 0.112299772892315, 0.884134241764073}, 
  {-0.0525839011025453, -0.0280749432230788, -0.0280749432230788, 0.884134241764072, 0.112299772892315, 0.112299772892315}};
  
  for (int  jc  = 0; jc < 3; jc += 1)
  {
    t33[jc] = 0.0;
    
  }
  
  for (int  ip  = 0; ip < 7; ip += 1)
  {
    t29[ip] = 0.0;
    t28[ip] = 0.0;
    t27[ip] = 0.0;
    t26[ip] = 0.0;
    
    for (int  i_0  = 0; i_0 < 6; i_0 += 1)
    {
      t26[ip] += t25[ip][i_0] * w_6[i_0][1];
      t27[ip] += t25[ip][i_0] * w_0[i_0][1];
      t28[ip] += t25[ip][i_0] * w_6[i_0][0];
      t29[ip] += t25[ip][i_0] * w_0[i_0][0];
      
    }
    t30[ip] = t24[ip] * t23;
    t31 += t30[ip];
    double  t32  = (t30[ip] * (((t22 * t29[ip]) + (t21 * t28[ip])) + ((t18 * t27[ip]) + (t14 * t26[ip]))));
    
    for (int  jc  = 0; jc < 3; jc += 1)
    {
      t33[jc] += t0[ip][jc] * t32;
      
    }
    
  }
  double  t34  = (-1 * t19);
  double  t35  = (-1 * t20);
  double  t36  = (-1 * t11);
  double  t37  = (-1 * t13);
  double  t38  = (-1 * (w_3[0] * w_4[0]));
  static const double  t39[7][6]  = {{-0.333333333333333, 0.333333333333333, 0.0, 1.33333333333333, -1.33333333333333, 0.0}, 
  {0.594853970706176, 2.18970794141235, 0.0, 0.405146029293824, -0.405146029293824, -2.78456191211852}, 
  {0.594853970706174, -0.594853970706175, 0.0, 3.18970794141234, -3.18970794141234, 0.0}, 
  {-2.18970794141235, -0.594853970706178, 0.0, 0.405146029293824, -0.405146029293824, 2.78456191211853}, 
  {-0.880568256420461, -0.761136512840922, 0.0, 1.88056825642046, -1.88056825642046, 1.64170476926138}, 
  {-0.880568256420459, 0.880568256420459, 0.0, 0.238863487159078, -0.238863487159078, 0.0}, 
  {0.761136512840921, 0.88056825642046, 0.0, 1.88056825642046, -1.88056825642046, -1.64170476926138}};
  static const double  t40[7][6]  = {{-0.333333333333338, 0.0, 0.333333333333335, 1.33333333333332, 0.0, -1.33333333333333}, 
  {0.594853970706169, 0.0, -0.594853970706174, 3.18970794141234, 1.17187593902012e-14, -3.18970794141235}, 
  {0.594853970706169, 0.0, 2.18970794141235, 0.40514602929381, -2.78456191211851, -0.405146029293824}, 
  {-2.18970794141235, 0.0, -0.594853970706174, 0.405146029293815, 2.78456191211853, -0.405146029293824}, 
  {-0.880568256420466, 0.0, 0.880568256420462, 0.238863487159067, 0.0, -0.238863487159079}, 
  {-0.880568256420465, 0.0, -0.761136512840921, 1.88056825642045, 1.64170476926139, -1.88056825642046}, 
  {0.761136512840915, 0.0, 0.880568256420462, 1.88056825642044, -1.64170476926137, -1.88056825642046}};
  double  t41  = (1 / w_4[0]);
  double  t42  = ((t41 * ((t22 * t20) + (t18 * t13))) * t31);
  static const double  t43[3]  = {-1.0, 0.0, 1.0};
  double  t44  = ((t41 * ((t22 * t19) + (t18 * t11))) * t31);
  static const double  t45[3]  = {-1.0, 1.0, 0.0};
  
  for (int  jc  = 0; jc < 3; jc += 1)
  {
    #pragma coffee expression
    A[jc+15] += (t33[jc] + (t45[jc] * t44)) + (t43[jc] * t42);
    
  }
  
  for (int  ip  = 0; ip < 7; ip += 1)
  {
    double  t53  = 0.0;
    double  t52  = 0.0;
    double  t51  = 0.0;
    double  t50  = 0.0;
    double  t49  = 0.0;
    double  t48  = 0.0;
    double  t47  = 0.0;
    double  t46  = 0.0;
    
    for (int  i_0  = 0; i_0 < 6; i_0 += 1)
    {
      t46 += t40[ip][i_0] * w_6[i_0][0];
      t47 += t39[ip][i_0] * w_6[i_0][0];
      t48 += t40[ip][i_0] * w_0[i_0][0];
      t49 += t39[ip][i_0] * w_0[i_0][0];
      t50 += t40[ip][i_0] * w_6[i_0][1];
      t51 += t39[ip][i_0] * w_6[i_0][1];
      t52 += t40[ip][i_0] * w_0[i_0][1];
      t53 += t39[ip][i_0] * w_0[i_0][1];
      
    }
    double  t54  = (((t0[ip][0] * w_8[0][0]) + (t0[ip][1] * w_8[1][0])) + (t0[ip][2] * w_8[2][0]));
    double  t55  = ((t51 * t19) + (t50 * t20));
    double  t56  = ((t51 * t11) + (t50 * t13));
    double  t57  = (t30[ip] * ((t38 * (t54 * w_5[1])) + (((t28[ip] * ((t53 * t19) + (t52 * t20))) + (t29[ip] * t55)) + ((t26[ip] * ((t53 * t11) + (t52 * t13))) + (t27[ip] * t56)))));
    double  t58  = (((t0[ip][0] * w_7[0][0]) + (t0[ip][1] * w_7[1][0])) + (t0[ip][2] * w_7[2][0]));
    double  t59  = (t30[ip] * (((t37 * t58) + (t55 * t20)) + (t56 * t13)));
    double  t60  = (t30[ip] * (((t36 * t58) + (t55 * t19)) + (t56 * t11)));
    double  t61  = ((t47 * t19) + (t46 * t20));
    double  t62  = ((t47 * t11) + (t46 * t13));
    double  t63  = (t30[ip] * ((t38 * (t54 * w_5[0])) + (((t28[ip] * ((t49 * t19) + (t48 * t20))) + (t29[ip] * t61)) + ((t26[ip] * ((t49 * t11) + (t48 * t13))) + (t27[ip] * t62)))));
    double  t64  = (t30[ip] * (((t35 * t58) + (t61 * t20)) + (t62 * t13)));
    double  t65  = (t30[ip] * (((t34 * t58) + (t61 * t19)) + (t62 * t11)));
    double  t66  = ((t61 + t56) * t30[ip]);
    
    for (int  jb  = 0; jb < 3; jb += 1)
    {
      #pragma coffee expression
      A[jb+12] += t0[ip][jb] * t66;
      
    }
    
    for (int  ja0  = 0; ja0 < 6; ja0 += 1)
    {
      #pragma coffee expression
      A[ja0*2+0] += ((t39[ip][ja0] * t65) + (t40[ip][ja0] * t64)) + (t25[ip][ja0] * t63);
      #pragma coffee expression
      A[ja0*2+1] += ((t39[ip][ja0] * t60) + (t40[ip][ja0] * t59)) + (t25[ip][ja0] * t57);
      
    }
    
  }
  
}

Loopy Kernel

ARGUMENTS:
A_0: GlobalArg, type: <auto/runtime>, shape: (6), dim_tags: (N0:stride:1)
A_1: GlobalArg, type: <auto/runtime>, shape: (6), dim_tags: (N0:stride:1)
A_2: GlobalArg, type: <auto/runtime>, shape: (3), dim_tags: (N0:stride:1)
A_3: GlobalArg, type: <auto/runtime>, shape: (3), dim_tags: (N0:stride:1)
coords: GlobalArg, type: <auto/runtime>, shape: (3, 2), dim_tags: (N1:stride:2, N0:stride:1)
w_0: GlobalArg, type: <auto/runtime>, shape: (6, 2), dim_tags: (N1:stride:2, N0:stride:1)
w_2: GlobalArg, type: <auto/runtime>, shape: (3, 1), dim_tags: (N1:stride:1, N0:stride:1)
w_3: ValueArg, type: <auto/runtime>
w_4: ValueArg, type: <auto/runtime>
w_5: ValueArg, type: <auto/runtime>
w_6: GlobalArg, type: <auto/runtime>, shape: (6, 2), dim_tags: (N1:stride:2, N0:stride:1)
w_7: GlobalArg, type: <auto/runtime>, shape: (3, 1), dim_tags: (N1:stride:1, N0:stride:1)
w_8: GlobalArg, type: <auto/runtime>, shape: (3, 1), dim_tags: (N1:stride:1, N0:stride:1)
---------------------------------------------------------------------------
SUBSTITUTION RULES:
sum_tmp(i33) := reduce(sum, [i38], cse_18[i33, i38])
sum_tmp_0(i33) := reduce(sum, [i38], cse_21[i33, i38])
sum_tmp_1(i33) := reduce(sum, [i38], cse_40[i33, i38])
sum_tmp_2(i33) := reduce(sum, [i38], cse_42[i33, i38])
sum_tmp_3(i33) := reduce(sum, [i38], cse_44[i33, i38])
sum_tmp_4(i33) := reduce(sum, [i38], cse_46[i33, i38])
sum_tmp_5(i33) := reduce(sum, [i38], cse_48[i33, i38])
sum_tmp_6(i33) := reduce(sum, [i38], cse_50[i33, i38])
sum_tmp_7(i201) := reduce(sum, [i33], cnst_3[i33, i201]*cse_10[i33]*(cse_34*cse_39[i33]*w_5 + cse_41[i33]*(cse_43[i33]*cse_12 + cse_45[i33]*cse_24) + cse_47[i33]*cse_26[i33] + cse_49[i33]*(cse_43[i33]*cse_28 + cse_45[i33]*cse_30) + cse_51[i33]*cse_32[i33]) + cnst[i33, i201]*cse_10[i33]*(cse_32[i33]*cse_28 + cse_17[i33]*(-1.0)*cse_12 + cse_26[i33]*cse_12) + cnst_2[i33, i201]*cse_10[i33]*(cse_32[i33]*cse_30 + cse_17[i33]*(-1.0)*cse_24 + cse_26[i33]*cse_24))
sum_tmp_8(i33) := reduce(sum, [i38], cse_52[i33, i38])
sum_tmp_9(i33) := reduce(sum, [i38], cse_55[i33, i38])
sum_tmp_10(i33) := reduce(sum, [i38], cse_62[i33, i38])
sum_tmp_11(i33) := reduce(sum, [i38], cse_64[i33, i38])
sum_tmp_12(i201) := reduce(sum, [i33], cnst_3[i33, i201]*cse_10[i33]*(cse_34*cse_39[i33]*w_5 + cse_41[i33]*(cse_63[i33]*cse_12 + cse_65[i33]*cse_24) + cse_47[i33]*cse_58[i33] + cse_49[i33]*(cse_63[i33]*cse_28 + cse_65[i33]*cse_30) + cse_51[i33]*cse_61[i33]) + cnst[i33, i201]*cse_10[i33]*(cse_61[i33]*cse_28 + cse_17[i33]*(-1.0)*cse_28 + cse_58[i33]*cse_12) + cnst_2[i33, i201]*cse_10[i33]*(cse_61[i33]*cse_30 + cse_17[i33]*(-1.0)*cse_30 + cse_58[i33]*cse_24))
sum_tmp_13(i203) := reduce(sum, [i33], cnst_1[i33, i203]*(cse_26[i33] + cse_61[i33])*cse_10[i33])
sum_tmp_14(i204) := reduce(sum, [i33], cnst_1[i33, i204]*cse_10[i33]*(cse_71*cse_47[i33] + (cse_73*cse_12 + cse_74*cse_24)*cse_41[i33] + cse_77*cse_51[i33] + (cse_73*cse_28 + cse_74*cse_30)*cse_49[i33]))
sum_tmp_15() := reduce(sum, [i33], cse_10[i33])
---------------------------------------------------------------------------
INSTRUCTIONS:
↱↱                    cse = (-1.0)*coords[0, 0]  {id=insn, tags=cse}
└│↱↱                  cse_0 = cse + coords[1, 0]  {id=insn_0, tags=cse}
↱│││↱                 cse_1 = (-1.0)*coords[0, 1]  {id=insn_1, tags=cse}
└││││↱↱               cse_2 = cse_1 + coords[2, 1]  {id=insn_2, tags=cse}
↱│└││└│               cse_3 = cse_0*cse_2  {id=insn_3, tags=cse}
│└↱││↱│               cse_4 = cse + coords[2, 0]  {id=insn_4, tags=cse}
│↱││└││↱              cse_5 = cse_1 + coords[1, 1]  {id=insn_5, tags=cse}
│└└│↱│││              cse_6 = cse_4*cse_5  {id=insn_6, tags=cse}
│↱ │└│││              cse_7 = (-1.0)*cse_6  {id=insn_7, tags=cse}
└└↱│↱│││              cse_8 = cse_3 + cse_7  {id=insn_8, tags=cse}
↱ └│││││              cse_9 = abs(cse_8)  {id=insn_9, tags=cse}
│  │││││              for i33_0
└↱↱│││││↱↱↱             cse_10[i33_0] = cnst_0[i33_0]*cse_9  {id=insn_10, tags=cse}
 ││││││││││           end i33_0
↱│││└││││││↱↱↱        cse_11 = 1.0 / cse_8  {id=insn_11, tags=cse}
└│││↱│└│││││││↱↱↱↱↱   cse_12 = cse_2*cse_11  {id=insn_12, tags=cse}
 │││││ ││││││││││││   for i33_1
↱│││││ ││││││││││││     cse_13[i33_1] = cnst_1[i33_1, 0]*w_7[0, 0]  {id=insn_13, tags=cse}
││││││ ││││││││││││   end i33_1
││││││ ││││││││││││   for i33_2
││││││↱││││││││││││     cse_14[i33_2] = cnst_1[i33_2, 1]*w_7[1, 0]  {id=insn_14, tags=cse}
│││││││││││││││││││   end i33_2
│││││││││││││││││││   for i33_3
└│││││└││││││││││││↱    cse_15[i33_3] = cse_13[i33_3] + cse_14[i33_3]  {id=insn_15, tags=cse}
 │││││ │││││││││││││  end i33_3
 │││││ │││││││││││││  for i33_4
↱│││││ │││││││││││││    cse_16[i33_4] = cnst_1[i33_4, 2]*w_7[2, 0]  {id=insn_16, tags=cse}
││││││ │││││││││││││  end i33_4
││││││ │││││││││││││  for i33_5
└│││││↱││││││││││││└…   cse_17[i33_5] = cse_15[i33_5] + cse_16[i33_5]  {id=insn_17, tags=cse}
 ││││││││││││││││││ … end i33_5
 ││││││││││││││││││ … for i38_0, i33_6
↱││││││││││││││││││ …     cse_18[i33_6, i38_0] = cnst[i33_6, i38_0]*w_6[i38_0, 0]  {id=insn_18, tags=cse}
│││││││││││││││││││ … end i38_0, i33_6
│││││││││││││││││││ … for i33_7
└││││││││││││││││││↱…   cse_19[i33_7] = sum_tmp(i33_7)  {id=insn_19, tags=cse}
 │││││││││││││││││││… end i33_7
 │││││││││││││││││││… for i33_8
↱│││└││││││││││││││└…   cse_20[i33_8] = cse_19[i33_8]*cse_12  {id=insn_20, tags=cse}
││││ ││││││││││││││ … end i33_8
││││ ││││││││││││││ … for i38_1, i33_9
││││↱││││││││││││││ …     cse_21[i33_9, i38_1] = cnst_2[i33_9, i38_1]*w_6[i38_1, 0]  {id=insn_21, tags=cse}
│││││││││││││││││││ … end i38_1, i33_9
│││││││││││││││││││ … for i33_10
││││└││││││││││││││↱…   cse_22[i33_10] = sum_tmp_0(i33_10)  {id=insn_22, tags=cse}
││││ │││││││││││││││… end i33_10
││││↱││└││││││││││││… cse_23 = (-1.0)*cse_5  {id=insn_23, tags=cse}
││││└││↱│││└││││││││… cse_24 = cse_23*cse_11  {id=insn_24, tags=cse}
││││ ││││││ ││││││││… for i33_11
││││↱││└│││ │││││││└…   cse_25[i33_11] = cse_22[i33_11]*cse_24  {id=insn_25, tags=cse}
│││││││ │││ │││││││ … end i33_11
│││││││ │││ │││││││ … for i33_12
└│││└││↱│││↱│││││││ …   cse_26[i33_12] = cse_20[i33_12] + cse_25[i33_12]  {id=insn_26, tags=cse}
 │││ ││││││││││││││ … end i33_12
↱│││ └│││││││││││││ … cse_27 = (-1.0)*cse_4  {id=insn_27, tags=cse}
└│││↱↱││││││└││││││↱… cse_28 = cse_27*cse_11  {id=insn_28, tags=cse}
 │││││││││││ │││││││… for i33_13
↱│││└│││││││ │││││││…   cse_29[i33_13] = cse_19[i33_13]*cse_28  {id=insn_29, tags=cse}
││││ │││││││ │││││││… end i33_13
│││└↱│││││││↱└││││││… cse_30 = cse_0*cse_11  {id=insn_30, tags=cse}
│││ │││││││││ ││││││… for i33_14
│││↱└││││││││ ││││││…   cse_31[i33_14] = cse_22[i33_14]*cse_30  {id=insn_31, tags=cse}
││││ ││││││││ ││││││… end i33_14
││││ ││││││││ ││││││… for i33_15
└││└↱││││││││ ││││││…   cse_32[i33_15] = cse_29[i33_15] + cse_31[i33_15]  {id=insn_32, tags=cse}
 ││ │││││││││ ││││││… end i33_15
↱││ │││││││││ ││││││… cse_33 = w_3*w_4  {id=insn_33, tags=cse}
└││↱│││││││││↱││││││… cse_34 = (-1.0)*cse_33  {id=insn_34, tags=cse}
 │││││││││││││││││││… for i33_16
↱│││││││││││││││││││…   cse_35[i33_16] = cnst_1[i33_16, 0]*w_8[0, 0]  {id=insn_35, tags=cse}
││││││││││││││││││││… end i33_16
││││││││││││││││││││… for i33_17
││││││││││││││││││││…   cse_36[i33_17] = cnst_1[i33_17, 1]*w_8[1, 0]  {id=insn_36, tags=cse}
││││││││││││││││││││… end i33_17
││││││││││││││││││││… for i33_18
└│││││││││││││││││││…   cse_37[i33_18] = cse_35[i33_18] + cse_36[i33_18]  {id=insn_37, tags=cse}
 │││││││││││││││││││… end i33_18
 │││││││││││││││││││… for i33_19
↱│││││││││││││││││││…   cse_38[i33_19] = cnst_1[i33_19, 2]*w_8[2, 0]  {id=insn_38, tags=cse}
││││││││││││││││││││… end i33_19
││││││││││││││││││││… for i33_20
└│││││││││││││││││││…   cse_39[i33_20] = cse_37[i33_20] + cse_38[i33_20]  {id=insn_39, tags=cse}
 │││││││││││││││││││… end i33_20
 │││││││││││││││││││… for i33_21, i38_2
↱│││││││││││││││││││…     cse_40[i33_21, i38_2] = cnst_3[i33_21, i38_2]*w_6[i38_2, 0]  {id=insn_40, tags=cse}
││││││││││││││││││││… end i33_21, i38_2
││││││││││││││││││││… for i33_22
└│││││││││││││││││││…   cse_41[i33_22] = sum_tmp_1(i33_22)  {id=insn_41, tags=cse}
 │││││││││││││││││││… end i33_22
 │││││││││││││││││││… for i38_3, i33_23
↱│││││││││││││││││││…     cse_42[i33_23, i38_3] = cnst[i33_23, i38_3]*w_0[i38_3, 0]  {id=insn_42, tags=cse}
││││││││││││││││││││… end i38_3, i33_23
││││││││││││││││││││… for i33_24
└│││││││││││││││││││…   cse_43[i33_24] = sum_tmp_2(i33_24)  {id=insn_43, tags=cse}
 │││││││││││││││││││… end i33_24
 │││││││││││││││││││… for i38_4, i33_25
↱│││││││││││││││││││…     cse_44[i33_25, i38_4] = cnst_2[i33_25, i38_4]*w_0[i38_4, 0]  {id=insn_44, tags=cse}
││││││││││││││││││││… end i38_4, i33_25
││││││││││││││││││││… for i33_26
└│││││││││││││││││││…   cse_45[i33_26] = sum_tmp_3(i33_26)  {id=insn_45, tags=cse}
 │││││││││││││││││││… end i33_26
 │││││││││││││││││││… for i38_5, i33_27
↱│││││││││││││││││││…     cse_46[i33_27, i38_5] = cnst_3[i33_27, i38_5]*w_0[i38_5, 0]  {id=insn_46, tags=cse}
││││││││││││││││││││… end i38_5, i33_27
││││││││││││││││││││… for i33_28
└│││││││││││││││││││…   cse_47[i33_28] = sum_tmp_4(i33_28)  {id=insn_47, tags=cse}
 │││││││││││││││││││… end i33_28
 │││││││││││││││││││… for i38_6, i33_29
↱│││││││││││││││││││…     cse_48[i33_29, i38_6] = cnst_3[i33_29, i38_6]*w_6[i38_6, 1]  {id=insn_48, tags=cse}
││││││││││││││││││││… end i38_6, i33_29
││││││││││││││││││││… for i33_30
└│││││││││││││││││││…   cse_49[i33_30] = sum_tmp_5(i33_30)  {id=insn_49, tags=cse}
 │││││││││││││││││││… end i33_30
 │││││││││││││││││││… for i33_31, i38_7
↱│││││││││││││││││││…     cse_50[i33_31, i38_7] = cnst_3[i33_31, i38_7]*w_0[i38_7, 1]  {id=insn_50, tags=cse}
││││││││││││││││││││… end i33_31, i38_7
││││││││││││││││││││… for i33_32
└│││││││││││││││││││…   cse_51[i33_32] = sum_tmp_6(i33_32)  {id=insn_51, tags=cse}
 │││││││││││││││││││… end i33_32
 │││││││││││││││││││… for i38_8, i33_33
↱│││││││││││││││││││…     cse_52[i33_33, i38_8] = cnst[i33_33, i38_8]*w_6[i38_8, 1]  {id=insn_52, tags=cse}
││││││││││││││││││││… end i38_8, i33_33
││││││││││││││││││││… for i33_34
└│││││││││││││││││││…   cse_53[i33_34] = sum_tmp_8(i33_34)  {id=insn_53, tags=cse}
 │││││││││││││││││││… end i33_34
 │││││││││││││││││││… for i33_35
↱│││││││││││││└│││││…   cse_54[i33_35] = cse_53[i33_35]*cse_12  {id=insn_54, tags=cse}
││││││││││││││ │││││… end i33_35
││││││││││││││ │││││… for i33_36, i38_9
││││││││││││││↱│││││…     cse_55[i33_36, i38_9] = cnst_2[i33_36, i38_9]*w_6[i38_9, 1]  {id=insn_55, tags=cse}
││││││││││││││││││││… end i33_36, i38_9
││││││││││││││││││││… for i33_37
││││││││││││││└│││││…   cse_56[i33_37] = sum_tmp_9(i33_37)  {id=insn_56, tags=cse}
││││││││││││││ │││││… end i33_37
││││││││││││││ │││││… for i33_38
││││││││││││││↱│││││…   cse_57[i33_38] = cse_56[i33_38]*cse_24  {id=insn_57, tags=cse}
││││││││││││││││││││… end i33_38
││││││││││││││││││││… for i33_39
└│││││││││││││└│││││…   cse_58[i33_39] = cse_54[i33_39] + cse_57[i33_39]  {id=insn_58, tags=cse}
 │││││││││││││ │││││… end i33_39
 │││││││││││││ │││││… for i33_40
↱││││└││││││││ │││││…   cse_59[i33_40] = cse_53[i33_40]*cse_28  {id=insn_59, tags=cse}
│││││ ││││││││ │││││… end i33_40
│││││ ││││││││ │││││… for i33_41
│││││↱││││││└│ │││││…   cse_60[i33_41] = cse_56[i33_41]*cse_30  {id=insn_60, tags=cse}
││││││││││││ │ │││││… end i33_41
││││││││││││ │ │││││… for i33_42
└││││└││││││↱│↱│││││…   cse_61[i33_42] = cse_59[i33_42] + cse_60[i33_42]  {id=insn_61, tags=cse}
 ││││ ││││││││││││││… end i33_42
 ││││ ││││││││││││││… for i33_43, i38_10
↱││││ ││││││││││││││…     cse_62[i33_43, i38_10] = cnst[i33_43, i38_10]*w_0[i38_10, 1]  {id=insn_62, tags=cse}
│││││ ││││││││││││││… end i33_43, i38_10
│││││ ││││││││││││││… for i33_44
└││││↱││││││││││││││…   cse_63[i33_44] = sum_tmp_10(i33_44)  {id=insn_63, tags=cse}
 │││││││││││││││││││… end i33_44
 │││││││││││││││││││… for i33_45, i38_11
↱│││││││││││││││││││…     cse_64[i33_45, i38_11] = cnst_2[i33_45, i38_11]*w_0[i38_11, 1]  {id=insn_64, tags=cse}
││││││││││││││││││││… end i33_45, i38_11
││││││││││││││││││││… for i33_46
└│││││││││││││││││││…   cse_65[i33_46] = sum_tmp_11(i33_46)  {id=insn_65, tags=cse}
 │││││││││││││││││││… end i33_46
↱│││││││││││││││││││… cse_66 = (-1.0)*w_8[0, 0]  {id=insn_66, tags=cse}
└│││││││││││││││││││… cse_67 = cse_66 + w_8[1, 0]  {id=insn_67, tags=cse}
↱││││││││││││││└││││… cse_68 = cse_67*cse_12  {id=insn_68, tags=cse}
│││││││││││││││↱││││… cse_69 = cse_66 + w_8[2, 0]  {id=insn_69, tags=cse}
│││││││││││││││└││││… cse_70 = cse_69*cse_24  {id=insn_70, tags=cse}
└││││││││││││││↱││││… cse_71 = cse_68 + cse_70  {id=insn_71, tags=cse}
↱│││││││││││││││││││… cse_72 = (-1.0)*w_2[0, 0]  {id=insn_72, tags=cse}
└│││││││││││││││││││… cse_73 = cse_72 + w_2[1, 0]  {id=insn_73, tags=cse}
↱│││││││││││││││││││… cse_74 = cse_72 + w_2[2, 0]  {id=insn_74, tags=cse}
│││││││││││││││││││└… cse_75 = cse_67*cse_28  {id=insn_75, tags=cse}
│││││││││││││││││││↱… cse_76 = cse_69*cse_30  {id=insn_76, tags=cse}
│││││││││││││││││││└… cse_77 = cse_75 + cse_76  {id=insn_77, tags=cse}
│││││││││││││││││││↱… cse_78 = 1.0 / w_4  {id=insn_78, tags=cse}
│└││││││││││││││││││… cse_79 = sum_tmp_15  {id=insn_79, tags=cse}
│ ││││││││││││││││││… for i201
│ └└└│└└││││││││└│││…   A_0[i201] = sum_tmp_7(i201)  {id=insn_80}
│    └  └│││└└││ └││…   A_1[i201] = sum_tmp_12(i201)  {id=insn_81}
│        │││  ││  ││… end i201
│        │││  ││  ││… for i203
│        └│└  └│  ││…   A_2[i203] = sum_tmp_13(i203)  {id=insn_82}
│         │    │  ││… end i203
│         │    │  ││… for i204
└         └    └  └└…   A_3[i204] = cnst_5[i204]*cse_79*cse_78*(cse_71*cse_24 + cse_77*cse_30) + sum_tmp_14(i204) + cnst_4[i204]*cse_79*cse_78*(cse_71*cse_12 + cse_77*cse_28)  {id=insn_83}
                    … end i204
---------------------------------------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment