Created
January 6, 2018 00:18
-
-
Save kaushikcfd/6f8c1f0d845f75f2958d1b51f624d971 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --------------------------------------------------------------------------- | |
| KERNEL: loopy_kernel_and_loopy_kernel_and_loopy_kernel_and_tsfc_kernel_and_loopy_kernel | |
| --------------------------------------------------------------------------- | |
| ARGUMENTS: | |
| A0_global: GlobalArg, type: np_atomic:dtype('float64'), shape: (A0_size), dim_tags: (N0:stride:1) | |
| A0_size: ValueArg, type: np:dtype('int32') | |
| A1_global: GlobalArg, type: np_atomic:dtype('float64'), shape: (A1_size, 2), dim_tags: (N1:stride:2, N0:stride:1) | |
| A1_size: ValueArg, type: np:dtype('int32') | |
| coords_global: GlobalArg, type: np:dtype('float64'), shape: (coords_global_len, 2), dim_tags: (N1:stride:2, N0:stride:1) | |
| coords_global_len: ValueArg, type: np:dtype('int32') | |
| ltg_0: GlobalArg, type: np:dtype('int32'), shape: (nelements, 3), dim_tags: (N1:stride:3, N0:stride:1) | |
| ltg_1: GlobalArg, type: np:dtype('int32'), shape: (nelements, 3), dim_tags: (N1:stride:3, N0:stride:1) | |
| nelements: ValueArg, type: np:dtype('int32') | |
| w_0_global: GlobalArg, type: np:dtype('float64'), shape: (w_0_global_len), dim_tags: (N0:stride:1) | |
| w_0_global_len: ValueArg, type: np:dtype('int32') | |
| w_1_global: GlobalArg, type: np:dtype('float64'), shape: (w_1_global_len, 2), dim_tags: (N1:stride:2, N0:stride:1) | |
| w_1_global_len: ValueArg, type: np:dtype('int32') | |
| --------------------------------------------------------------------------- | |
| DOMAINS: | |
| [A0_size, A1_size, nelements] -> { [dim_init_1, ibf_gather_0, ibf_gather_1, ibf_gather_2, ibf_scat_0, idim_scat_0, ibf_scat_1, idim_scat_1, ibf_scat_2, idim_scat_2, i_init_0_outer, i_init_0_inner, i_init_1_outer, i_init_1_inner, iel_outer, iel_inner] : idim_scat_1 = 0 and 0 <= dim_init_1 <= 1 and 0 <= ibf_gather_0 <= 2 and 0 <= ibf_gather_1 <= 2 and 0 <= ibf_gather_2 <= 2 and 0 <= ibf_scat_0 <= 2 and 0 <= idim_scat_0 <= 1 and 0 <= ibf_scat_1 <= 2 and 0 <= ibf_scat_2 <= 2 and 0 <= idim_scat_2 <= 1 and i_init_0_inner >= 0 and -32i_init_0_outer <= i_init_0_inner <= 31 and i_init_0_inner < A0_size - 32i_init_0_outer and i_init_1_inner >= 0 and -32i_init_1_outer <= i_init_1_inner <= 31 and i_init_1_inner < A1_size - 32i_init_1_outer and iel_inner >= 0 and -32iel_outer <= iel_inner <= 31 and iel_inner < nelements - 32iel_outer } | |
| { [i16, i112, i113, i112_0, i113_0, i113_1, i16_0, i16_1, i16_2] : 0 <= i16 <= 2 and 0 <= i112 <= 2 and 0 <= i113 <= 2 and 0 <= i112_0 <= 2 and 0 <= i113_0 <= 2 and 0 <= i113_1 <= 2 and 0 <= i16_0 <= 2 and 0 <= i16_1 <= 2 and 0 <= i16_2 <= 2 } | |
| --------------------------------------------------------------------------- | |
| INAME IMPLEMENTATION TAGS: | |
| dim_init_1: unr | |
| i16: None | |
| i16_0: None | |
| i16_1: None | |
| i16_2: None | |
| i112: None | |
| i112_0: None | |
| i113: None | |
| i113_0: None | |
| i113_1: None | |
| i_init_0_inner: l.0 | |
| i_init_0_outer: g.0 | |
| i_init_1_inner: l.0 | |
| i_init_1_outer: g.0 | |
| ibf_gather_0: None | |
| ibf_gather_1: None | |
| ibf_gather_2: None | |
| ibf_scat_0: None | |
| ibf_scat_1: None | |
| ibf_scat_2: None | |
| idim_scat_0: None | |
| idim_scat_1: None | |
| idim_scat_2: None | |
| iel_inner: l.0 | |
| iel_outer: g.0 | |
| --------------------------------------------------------------------------- | |
| TEMPORARIES: | |
| acc_i16: type: np:dtype('float64'), shape: () scope:private | |
| acc_i16_0: type: np:dtype('float64'), shape: () scope:private | |
| acc_i16_1: type: np:dtype('float64'), shape: () scope:private | |
| acc_i16_2: type: np:dtype('float64'), shape: () scope:private | |
| cnst: type: np:dtype('float64'), shape: (3, 3), dim_tags: (N1:stride:3, N0:stride:1) scope:global | |
| cnst_0: type: np:dtype('float64'), shape: (3), dim_tags: (N0:stride:1) scope:global | |
| cnst_1: type: np:dtype('float64'), shape: (3), dim_tags: (N0:stride:1) scope:global | |
| cnst_2: type: np:dtype('float64'), shape: (3), dim_tags: (N0:stride:1) scope:global | |
| cse: type: np:dtype('float64'), shape: () scope:private | |
| cse_0: type: np:dtype('float64'), shape: () scope:private | |
| cse_1: type: np:dtype('float64'), shape: () scope:private | |
| cse_2: type: np:dtype('float64'), shape: () scope:private | |
| cse_3: type: np:dtype('float64'), shape: () scope:private | |
| cse_4: type: np:dtype('float64'), shape: () scope:private | |
| cse_5: type: np:dtype('float64'), shape: () scope:private | |
| cse_6: type: np:dtype('float64'), shape: () scope:private | |
| cse_7: type: np:dtype('float64'), shape: () scope:private | |
| cse_8: type: np:dtype('float64'), shape: () scope:private | |
| cse_9: type: np:dtype('float64'), shape: () scope:private | |
| cse_10: type: np:dtype('float64'), shape: (3), dim_tags: (N0:stride:1) scope:private | |
| cse_11: type: np:dtype('float64'), shape: () scope:private | |
| cse_12: type: np:dtype('float64'), shape: () scope:private | |
| cse_13: type: np:dtype('float64'), shape: () scope:private | |
| cse_14: type: np:dtype('float64'), shape: () scope:private | |
| cse_15: type: np:dtype('float64'), shape: () scope:private | |
| cse_16: type: np:dtype('float64'), shape: () scope:private | |
| cse_17: type: np:dtype('float64'), shape: () scope:private | |
| cse_18: type: np:dtype('float64'), shape: () scope:private | |
| cse_19: type: np:dtype('float64'), shape: () scope:private | |
| cse_20: type: np:dtype('float64'), shape: () scope:private | |
| cse_21: type: np:dtype('float64'), shape: () scope:private | |
| cse_22: type: np:dtype('float64'), shape: () scope:private | |
| cse_23: type: np:dtype('float64'), shape: () scope:private | |
| cse_24: type: np:dtype('float64'), shape: () scope:private | |
| cse_25: type: np:dtype('float64'), shape: () scope:private | |
| cse_26: type: np:dtype('float64'), shape: () scope:private | |
| cse_27: type: np:dtype('float64'), shape: () scope:private | |
| sum_tmp_0: type: np:dtype('float64'), shape: (i112_0:3), dim_tags: (N0:stride:1) scope:private | |
| sum_tmp_0_0: type: np:dtype('float64'), shape: () scope:private | |
| sum_tmp_1_0: type: np:dtype('float64'), shape: (i113_0:3), dim_tags: (N0:stride:1) scope:private | |
| sum_tmp_2_0: type: np:dtype('float64'), shape: (i113_1:3), dim_tags: (N0:stride:1) scope:private | |
| --------------------------------------------------------------------------- | |
| INSTRUCTIONS: | |
| for i_init_0_outer, i_init_0_inner | |
| ↱↱ A0_global[i_init_0_inner + i_init_0_outer*32] = 0.0 {id=init_0, tags=init} | |
| ││ end i_init_0_outer, i_init_0_inner | |
| ││ for i_init_1_inner, i_init_1_outer, dim_init_1 | |
| ││↱↱↱ A1_global[i_init_1_inner + i_init_1_outer*32, dim_init_1] = 0.0 {id=init_1, tags=init} | |
| │││││ end i_init_1_inner, i_init_1_outer, dim_init_1 | |
| └│└││↱↱↱↱↱↱↱↱↱↱↱↱↱↱↱… ... gbarrier {id=gb1} | |
| │ │││││││││││││││││… for i113_1, iel_inner, iel_outer | |
| ↱│ ││└││││││││││││││… acc_i16 = 0 {id=sum_tmp_2_i16_init} | |
| ││ ││ ││││││││││││││… end i113_1 | |
| ││↱││↱└│││││││││││││… cse = (-1.0)*coords_global[ltg_0[iel_inner + iel_outer*32, 0], 0] {id=insn_0, tags=cse:formknl} | |
| ││└│││↱└││││││││││││… cse_0 = cse + coords_global[ltg_0[iel_inner + iel_outer*32, 1], 0] {id=insn_0_0, tags=cse:formknl} | |
| ││↱││││↱└│││││││││││… cse_1 = (-1.0)*coords_global[ltg_0[iel_inner + iel_outer*32, 0], 1] {id=insn_1, tags=cse:formknl} | |
| ││└│││││↱└││││││││││… cse_2 = cse_1 + coords_global[ltg_0[iel_inner + iel_outer*32, 2], 1] {id=insn_2, tags=cse:formknl} | |
| ││↱│││└│└ └│││││││││… cse_3 = cse_0*cse_2 {id=insn_3, tags=cse:formknl} | |
| │││││└↱│↱ └││││││││… cse_4 = cse + coords_global[ltg_0[iel_inner + iel_outer*32, 2], 0] {id=insn_4, tags=cse:formknl} | |
| │││││↱│└│↱ └│││││││… cse_5 = cse_1 + coords_global[ltg_0[iel_inner + iel_outer*32, 1], 1] {id=insn_5, tags=cse:formknl} | |
| │││││└└↱││ └││││││… cse_6 = cse_4*cse_5 {id=insn_6, tags=cse:formknl} | |
| │││││↱ └││ └│││││… cse_7 = (-1.0)*cse_6 {id=insn_7, tags=cse:formknl} | |
| ││└││└↱↱││ └││││… cse_8 = cse_3 + cse_7 {id=insn_8, tags=cse:formknl} | |
| ││↱││ └│││ └│││… cse_9 = abs(cse_8) {id=insn_9, tags=cse:formknl} | |
| │││││ │││ │││… for i16 | |
| ││└││↱↱│││↱↱↱↱↱↱↱└││… cse_10[i16] = cnst_0[i16]*cse_9 {id=insn_10, tags=cse:formknl} | |
| ││ ││││││││││││││ ││… for i113_1 | |
| └│↱││└│││││││││││ └│… acc_i16 = acc_i16 + cnst[i16, i113_1]*cse_10[i16]*(cnst[i16, 2]*w_1_global[ltg_0[iel_inner + iel_outer*32, 2], 1] + cnst[i16, 0]*w_1_global[ltg_0[iel_inner + iel_outer*32, 0], 1] + cnst[i16, 1]*w_1_global[ltg_0[iel_inner + iel_outer*32, 1], 1]) {id=sum_tmp_2_i16_update} | |
| ││││ │││││││││││ │… end i16 | |
| ↱│└││ └││││││││││ └… sum_tmp_2_0[i113_1] = acc_i16 {id=sum_tmp_2_0, tags=formknl} | |
| ││ ││ ││││││││││ … end i113_1 | |
| ││ ││ ││││││││││ … for i113_0 | |
| ││↱││ ││││││││││ … acc_i16_0 = 0 {id=sum_tmp_1_i16_0_init} | |
| │││││ ││││││││││ … for i16_0 | |
| ││└││↱ │││└││││││ … acc_i16_0 = acc_i16_0 + cnst[i16_0, i113_0]*cse_10[i16_0]*(cnst[i16_0, 2]*w_1_global[ltg_0[iel_inner + iel_outer*32, 2], 0] + cnst[i16_0, 0]*w_1_global[ltg_0[iel_inner + iel_outer*32, 0], 0] + cnst[i16_0, 1]*w_1_global[ltg_0[iel_inner + iel_outer*32, 1], 0]) {id=sum_tmp_1_i16_0_update} | |
| ││ │││ │││ ││││││ … end i16_0 | |
| ││↱││└ │││ └│││││ … sum_tmp_1_0[i113_0] = acc_i16_0 {id=sum_tmp_1_0, tags=formknl} | |
| │││││ │││ │││││ … end i113_0 | |
| │││││ │││ │││││ … for i112_0 | |
| │││││↱ │││ │││││ … acc_i16_1 = 0 {id=sum_tmp_i16_1_init} | |
| ││││││ │││ │││││ … end i112_0 | |
| ││││││↱└││↱↱│││││↱ … cse_13 = 1.0 / cse_8 {id=insn_13, tags=cse:formknl} | |
| ││││││└↱││││││││││↱↱… cse_14 = cse_2*cse_13 {id=insn_14, tags=cse:formknl} | |
| ││││││↱││└││││││││││… cse_17 = (-1.0)*cse_5 {id=insn_17, tags=cse:formknl} | |
| ││││││└││↱└│││││││││… cse_18 = cse_17*cse_13 {id=insn_18, tags=cse:formknl} | |
| ││││││↱│││↱│││││││││… cse_11 = (-1.0)*w_0_global[ltg_1[iel_inner + iel_outer*32, 0]] {id=insn_11, tags=cse:formknl} | |
| ││││││└│││││││││││││… cse_12 = cse_11 + w_0_global[ltg_1[iel_inner + iel_outer*32, 1]] {id=insn_12, tags=cse:formknl} | |
| ││││││↱└││││││││││││… cse_15 = cse_12*cse_14 {id=insn_15, tags=cse:formknl} | |
| │││││││↱││└│││││││││… cse_16 = cse_11 + w_0_global[ltg_1[iel_inner + iel_outer*32, 2]] {id=insn_16, tags=cse:formknl} | |
| │││││││└│└↱│││││││││… cse_19 = cse_16*cse_18 {id=insn_19, tags=cse:formknl} | |
| ││││││└↱│↱└│││││││││… cse_20 = cse_15 + cse_19 {id=insn_20, tags=cse:formknl} | |
| ││││││↱│└│ │││││││││… cse_21 = (-1.0)*cse_4 {id=insn_21, tags=cse:formknl} | |
| ││││││└│↱│↱└││││││││… cse_22 = cse_21*cse_13 {id=insn_22, tags=cse:formknl} | |
| ││││││↱││││↱│││││└││… cse_24 = cse_0*cse_13 {id=insn_24, tags=cse:formknl} | |
| ││││││││└││││││││↱││… cse_23 = cse_12*cse_22 {id=insn_23, tags=cse:formknl} | |
| ││││││└│↱│││││││││││… cse_25 = cse_16*cse_24 {id=insn_25, tags=cse:formknl} | |
| ││││││↱│└││││││││└││… cse_26 = cse_23 + cse_25 {id=insn_26, tags=cse:formknl} | |
| ││││││││↱││││││││ ││… acc_i16_2 = 0 {id=sum_tmp_0_i16_2_init} | |
| │││││││││││││││││ ││… for i16_2 | |
| ││││││││└│││└││││↱││… acc_i16_2 = acc_i16_2 + cse_10[i16_2] {id=sum_tmp_0_i16_2_update} | |
| ││││││││ │││ │││││││… end i16_2 | |
| ││││││││↱│││ └│││└││… sum_tmp_0_0 = acc_i16_2 {id=sum_tmp_0_0, tags=formknl} | |
| ││││││││└│││↱↱└││↱││… cse_27 = sum_tmp_0_0 {id=insn_27, tags=cse:formknl} | |
| ││││││││ │││││ │││││… for i112_0, i16_1 | |
| │││││└└└↱│└└└│ └││└│… acc_i16_1 = acc_i16_1 + cnst[i16_1, i112_0]*cse_10[i16_1]*(cnst[i16_1, 2]*w_0_global[ltg_1[iel_inner + iel_outer*32, 2]] + cnst[i16_1, 0]*w_0_global[ltg_1[iel_inner + iel_outer*32, 0]] + cnst[i16_1, 1]*w_0_global[ltg_1[iel_inner + iel_outer*32, 1]]) {id=sum_tmp_i16_1_update} | |
| │││││ ││ │ ││ │… end i16_1 | |
| │││││↱ └└ └ └│ └… sum_tmp_0[i112_0] = acc_i16_1 {id=sum_tmp_3, tags=formknl} | |
| ││││││ │ … end i112_0 | |
| ││││││ │ … for ibf_gather_0 | |
| │└│││└ └ … A0_global[ltg_1[iel_inner + iel_outer*32, ibf_gather_0]] = A0_global[ltg_1[iel_inner + iel_outer*32, ibf_gather_0]] + cnst_2[ibf_gather_0]*(cse_20*cse_18 + cse_26*cse_24)*cse_27 + sum_tmp_0[ibf_gather_0] + cnst_1[ibf_gather_0]*(cse_20*cse_14 + cse_26*cse_22)*cse_27 {id=insn, tags=formknl, atomic=update[A0_global]seq_cst/auto} | |
| │ │││ … end ibf_gather_0 | |
| │ │││ … for ibf_gather_1 | |
| │ └└│ … A1_global[ltg_0[iel_inner + iel_outer*32, ibf_gather_1], 0] = A1_global[ltg_0[iel_inner + iel_outer*32, ibf_gather_1], 0] + sum_tmp_1_0[ibf_gather_1] {id=insn_0_1, tags=formknl, atomic=update[A1_global]seq_cst/auto} | |
| │ │ … end ibf_gather_1 | |
| │ │ … for ibf_gather_2 | |
| └ └ … A1_global[ltg_0[iel_inner + iel_outer*32, ibf_gather_2], 1] = A1_global[ltg_0[iel_inner + iel_outer*32, ibf_gather_2], 1] + sum_tmp_2_0[ibf_gather_2] {id=insn_1_0, tags=formknl, atomic=update[A1_global]seq_cst/auto} | |
| … end iel_inner, ibf_gather_2, iel_outer | |
| --------------------------------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment