Last active
January 5, 2018 05:54
-
-
Save kaushikcfd/ac2dd6d4a2cf49fd60f7e7e5d73918dc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
=========================================================================== | |
failing kernel during pre-schedule check: | |
=========================================================================== | |
--------------------------------------------------------------------------- | |
KERNEL: loopy_kernel_and_loopy_kernel_and_loopy_kernel_and_tsfc_kernel_and_loopy_kernel | |
--------------------------------------------------------------------------- | |
ARGUMENTS: | |
A0_global: GlobalArg, type: np_atomic:dtype('float64'), shape: (A0_size), dim_tags: (N0:stride:1) | |
A0_size: ValueArg, type: np:dtype('int32') | |
A1_global: GlobalArg, type: np_atomic:dtype('float64'), shape: (A1_size, 2), dim_tags: (N1:stride:2, N0:stride:1) | |
A1_size: ValueArg, type: np:dtype('int32') | |
coords_global: GlobalArg, type: np:dtype('float64'), shape: (coords_global_len, 2), dim_tags: (N1:stride:2, N0:stride:1) | |
coords_global_len: ValueArg, type: np:dtype('int32') | |
ltg_0: GlobalArg, type: np:dtype('int32'), shape: (nelements, 3), dim_tags: (N1:stride:3, N0:stride:1) | |
ltg_1: GlobalArg, type: np:dtype('int32'), shape: (nelements, 3), dim_tags: (N1:stride:3, N0:stride:1) | |
nelements: ValueArg, type: np:dtype('int32') | |
w_0_global: GlobalArg, type: np:dtype('float64'), shape: (w_0_global_len), dim_tags: (N0:stride:1) | |
w_0_global_len: ValueArg, type: np:dtype('int32') | |
w_1_global: GlobalArg, type: np:dtype('float64'), shape: (w_1_global_len, 2), dim_tags: (N1:stride:2, N0:stride:1) | |
w_1_global_len: ValueArg, type: np:dtype('int32') | |
--------------------------------------------------------------------------- | |
DOMAINS: | |
[A0_size, A1_size, nelements] -> { [dim_init_1, ibf_gather_0, ibf_gather_1, ibf_gather_2, ibf_scat_0, idim_scat_0, ibf_scat_1, idim_scat_1, ibf_scat_2, idim_scat_2, i_init_0_outer, i_init_0_inner, i_init_1_outer, i_init_1_inner, iel_outer, iel_inner] : idim_scat_1 = 0 and 0 <= dim_init_1 <= 1 and 0 <= ibf_gather_0 <= 2 and 0 <= ibf_gather_1 <= 2 and 0 <= ibf_gather_2 <= 2 and 0 <= ibf_scat_0 <= 2 and 0 <= idim_scat_0 <= 1 and 0 <= ibf_scat_1 <= 2 and 0 <= ibf_scat_2 <= 2 and 0 <= idim_scat_2 <= 1 and i_init_0_inner >= 0 and -32i_init_0_outer <= i_init_0_inner <= 31 and i_init_0_inner < A0_size - 32i_init_0_outer and i_init_1_inner >= 0 and -32i_init_1_outer <= i_init_1_inner <= 31 and i_init_1_inner < A1_size - 32i_init_1_outer and iel_inner >= 0 and -32iel_outer <= iel_inner <= 31 and iel_inner < nelements - 32iel_outer } | |
{ [i16, i112, i113, i112_0, i113_0, i113_1, i16_0, i16_1, i16_2] : 0 <= i16 <= 2 and 0 <= i112 <= 2 and 0 <= i113 <= 2 and 0 <= i112_0 <= 2 and 0 <= i113_0 <= 2 and 0 <= i113_1 <= 2 and 0 <= i16_0 <= 2 and 0 <= i16_1 <= 2 and 0 <= i16_2 <= 2 } | |
--------------------------------------------------------------------------- | |
INAME IMPLEMENTATION TAGS: | |
dim_init_1: unr | |
i16: None | |
i16_0: None | |
i16_1: None | |
i16_2: None | |
i112: None | |
i112_0: None | |
i113: None | |
i113_0: None | |
i113_1: None | |
i_init_0_inner: l.0 | |
i_init_0_outer: g.0 | |
i_init_1_inner: l.0 | |
i_init_1_outer: g.0 | |
ibf_gather_0: None | |
ibf_gather_1: None | |
ibf_gather_2: None | |
ibf_scat_0: None | |
ibf_scat_1: None | |
ibf_scat_2: None | |
idim_scat_0: None | |
idim_scat_1: None | |
idim_scat_2: None | |
iel_inner: l.0 | |
iel_outer: g.0 | |
--------------------------------------------------------------------------- | |
TEMPORARIES: | |
acc_i16: type: np:dtype('float64'), shape: () scope:private | |
acc_i16_0: type: np:dtype('float64'), shape: () scope:private | |
acc_i16_1: type: np:dtype('float64'), shape: () scope:private | |
acc_i16_2: type: np:dtype('float64'), shape: () scope:private | |
cnst: type: np:dtype('float64'), shape: (3, 3), dim_tags: (N1:stride:3, N0:stride:1) scope:global | |
cnst_0: type: np:dtype('float64'), shape: (3), dim_tags: (N0:stride:1) scope:global | |
cnst_1: type: np:dtype('float64'), shape: (3), dim_tags: (N0:stride:1) scope:global | |
cnst_2: type: np:dtype('float64'), shape: (3), dim_tags: (N0:stride:1) scope:global | |
cse: type: np:dtype('float64'), shape: () scope:private | |
cse_0: type: np:dtype('float64'), shape: () scope:private | |
cse_1: type: np:dtype('float64'), shape: () scope:private | |
cse_2: type: np:dtype('float64'), shape: () scope:private | |
cse_3: type: np:dtype('float64'), shape: () scope:private | |
cse_4: type: np:dtype('float64'), shape: () scope:private | |
cse_5: type: np:dtype('float64'), shape: () scope:private | |
cse_6: type: np:dtype('float64'), shape: () scope:private | |
cse_7: type: np:dtype('float64'), shape: () scope:private | |
cse_8: type: np:dtype('float64'), shape: () scope:private | |
cse_9: type: np:dtype('float64'), shape: () scope:private | |
cse_10: type: np:dtype('float64'), shape: (3), dim_tags: (N0:stride:1) scope:private | |
cse_11: type: np:dtype('float64'), shape: () scope:private | |
cse_12: type: np:dtype('float64'), shape: () scope:private | |
cse_13: type: np:dtype('float64'), shape: () scope:private | |
cse_14: type: np:dtype('float64'), shape: () scope:private | |
cse_15: type: np:dtype('float64'), shape: () scope:private | |
cse_16: type: np:dtype('float64'), shape: () scope:private | |
cse_17: type: np:dtype('float64'), shape: () scope:private | |
cse_18: type: np:dtype('float64'), shape: () scope:private | |
cse_19: type: np:dtype('float64'), shape: () scope:private | |
cse_20: type: np:dtype('float64'), shape: () scope:private | |
cse_21: type: np:dtype('float64'), shape: () scope:private | |
cse_22: type: np:dtype('float64'), shape: () scope:private | |
cse_23: type: np:dtype('float64'), shape: () scope:private | |
cse_24: type: np:dtype('float64'), shape: () scope:private | |
cse_25: type: np:dtype('float64'), shape: () scope:private | |
cse_26: type: np:dtype('float64'), shape: () scope:private | |
cse_27: type: np:dtype('float64'), shape: () scope:private | |
sum_tmp_0: type: np:dtype('float64'), shape: (i112_0:3), dim_tags: (N0:stride:1) scope:private | |
sum_tmp_0_0: type: np:dtype('float64'), shape: () scope:private | |
sum_tmp_1_0: type: np:dtype('float64'), shape: (i113_0:3), dim_tags: (N0:stride:1) scope:private | |
sum_tmp_2_0: type: np:dtype('float64'), shape: (i113_1:3), dim_tags: (N0:stride:1) scope:private | |
--------------------------------------------------------------------------- | |
INSTRUCTIONS: | |
for i_init_0_inner, i_init_0_outer | |
↱↱ A0_global[i_init_0_inner + i_init_0_outer*32] = 0.0 {id=init_0, tags=init} | |
││ end i_init_0_inner, i_init_0_outer | |
││ for dim_init_1, i_init_1_inner, i_init_1_outer | |
││↱↱↱ A1_global[i_init_1_inner + i_init_1_outer*32, dim_init_1] = 0.0 {id=init_1, tags=init} | |
│││││ end dim_init_1, i_init_1_inner, i_init_1_outer | |
└│└││↱↱↱↱↱↱↱↱↱↱↱↱↱↱↱… ... gbarrier {id=gb1} | |
│ │││││││││││││││││… for iel_inner, iel_outer, i113_1 | |
↱│ ││└││││││││││││││… acc_i16 = 0 {id=sum_tmp_2_i16_init} | |
││ ││ ││││││││││││││… end i113_1 | |
││↱││↱└│││││││││││││… cse = (-1.0)*coords_global[ltg_0[iel_inner + iel_outer*32, 0], 0] {id=insn_0, tags=formknl:cse} | |
││└│││↱└││││││││││││… cse_0 = cse + coords_global[ltg_0[iel_inner + iel_outer*32, 1], 0] {id=insn_0_0, tags=formknl:cse} | |
││↱││││↱└│││││││││││… cse_1 = (-1.0)*coords_global[ltg_0[iel_inner + iel_outer*32, 0], 1] {id=insn_1, tags=formknl:cse} | |
││└│││││↱└││││││││││… cse_2 = cse_1 + coords_global[ltg_0[iel_inner + iel_outer*32, 2], 1] {id=insn_2, tags=formknl:cse} | |
││↱│││└│└ └│││││││││… cse_3 = cse_0*cse_2 {id=insn_3, tags=formknl:cse} | |
│││││└↱│↱ └││││││││… cse_4 = cse + coords_global[ltg_0[iel_inner + iel_outer*32, 2], 0] {id=insn_4, tags=formknl:cse} | |
│││││↱│└│↱ └│││││││… cse_5 = cse_1 + coords_global[ltg_0[iel_inner + iel_outer*32, 1], 1] {id=insn_5, tags=formknl:cse} | |
│││││└└↱││ └││││││… cse_6 = cse_4*cse_5 {id=insn_6, tags=formknl:cse} | |
│││││↱ └││ └│││││… cse_7 = (-1.0)*cse_6 {id=insn_7, tags=formknl:cse} | |
││└││└↱↱││ └││││… cse_8 = cse_3 + cse_7 {id=insn_8, tags=formknl:cse} | |
││↱││ └│││ └│││… cse_9 = abs(cse_8) {id=insn_9, tags=formknl:cse} | |
│││││ │││ │││… for i16 | |
││└││↱↱│││↱↱↱↱↱↱↱└││… cse_10[i16] = cnst_0[i16]*cse_9 {id=insn_10, tags=formknl:cse} | |
││ ││││││││││││││ ││… for i113_1 | |
└│↱││└│││││││││││ └│… acc_i16 = acc_i16 + cnst[i16, i113_1]*cse_10[i16]*(cnst[i16, 2]*w_1_global[ltg_0[iel_inner + iel_outer*32, 2], 1] + cnst[i16, 0]*w_1_global[ltg_0[iel_inner + iel_outer*32, 0], 1] + cnst[i16, 1]*w_1_global[ltg_0[iel_inner + iel_outer*32, 1], 1]) {id=sum_tmp_2_i16_update} | |
││││ │││││││││││ │… end i16 | |
↱│└││ └││││││││││ └… sum_tmp_2_0[i113_1] = acc_i16 {id=sum_tmp_2_0, tags=formknl} | |
││ ││ ││││││││││ … end i113_1 | |
││ ││ ││││││││││ … for i113_0 | |
││↱││ ││││││││││ … acc_i16_0 = 0 {id=sum_tmp_1_i16_0_init} | |
│││││ ││││││││││ … for i16_0 | |
││└││↱ │││└││││││ … acc_i16_0 = acc_i16_0 + cnst[i16_0, i113_0]*cse_10[i16_0]*(cnst[i16_0, 2]*w_1_global[ltg_0[iel_inner + iel_outer*32, 2], 0] + cnst[i16_0, 0]*w_1_global[ltg_0[iel_inner + iel_outer*32, 0], 0] + cnst[i16_0, 1]*w_1_global[ltg_0[iel_inner + iel_outer*32, 1], 0]) {id=sum_tmp_1_i16_0_update} | |
││ │││ │││ ││││││ … end i16_0 | |
││↱││└ │││ └│││││ … sum_tmp_1_0[i113_0] = acc_i16_0 {id=sum_tmp_1_0, tags=formknl} | |
│││││ │││ │││││ … end i113_0 | |
│││││ │││ │││││ … for i112_0 | |
│││││↱ │││ │││││ … acc_i16_1 = 0 {id=sum_tmp_i16_1_init} | |
││││││ │││ │││││ … end i112_0 | |
││││││↱└││↱↱│││││↱ … cse_13 = 1.0 / cse_8 {id=insn_13, tags=formknl:cse} | |
││││││└↱││││││││││↱↱… cse_14 = cse_2*cse_13 {id=insn_14, tags=formknl:cse} | |
││││││↱││└││││││││││… cse_17 = (-1.0)*cse_5 {id=insn_17, tags=formknl:cse} | |
││││││└││↱└│││││││││… cse_18 = cse_17*cse_13 {id=insn_18, tags=formknl:cse} | |
││││││↱│││↱│││││││││… cse_11 = (-1.0)*w_0_global[ltg_1[iel_inner + iel_outer*32, 0]] {id=insn_11, tags=formknl:cse} | |
││││││└│││││││││││││… cse_12 = cse_11 + w_0_global[ltg_1[iel_inner + iel_outer*32, 1]] {id=insn_12, tags=formknl:cse} | |
││││││↱└││││││││││││… cse_15 = cse_12*cse_14 {id=insn_15, tags=formknl:cse} | |
│││││││↱││└│││││││││… cse_16 = cse_11 + w_0_global[ltg_1[iel_inner + iel_outer*32, 2]] {id=insn_16, tags=formknl:cse} | |
│││││││└│└↱│││││││││… cse_19 = cse_16*cse_18 {id=insn_19, tags=formknl:cse} | |
││││││└↱│↱└│││││││││… cse_20 = cse_15 + cse_19 {id=insn_20, tags=formknl:cse} | |
││││││↱│└│ │││││││││… cse_21 = (-1.0)*cse_4 {id=insn_21, tags=formknl:cse} | |
││││││└│↱│↱└││││││││… cse_22 = cse_21*cse_13 {id=insn_22, tags=formknl:cse} | |
││││││↱││││↱│││││└││… cse_24 = cse_0*cse_13 {id=insn_24, tags=formknl:cse} | |
││││││││└││││││││↱││… cse_23 = cse_12*cse_22 {id=insn_23, tags=formknl:cse} | |
││││││└│↱│││││││││││… cse_25 = cse_16*cse_24 {id=insn_25, tags=formknl:cse} | |
││││││↱│└││││││││└││… cse_26 = cse_23 + cse_25 {id=insn_26, tags=formknl:cse} | |
││││││││↱││││││││ ││… acc_i16_2 = 0 {id=sum_tmp_0_i16_2_init} | |
│││││││││││││││││ ││… for i16_2 | |
││││││││└│││└││││↱││… acc_i16_2 = acc_i16_2 + cse_10[i16_2] {id=sum_tmp_0_i16_2_update} | |
││││││││ │││ │││││││… end i16_2 | |
││││││││↱│││ └│││└││… sum_tmp_0_0 = acc_i16_2 {id=sum_tmp_0_0, tags=formknl} | |
││││││││└│││↱↱└││↱││… cse_27 = sum_tmp_0_0 {id=insn_27, tags=formknl:cse} | |
││││││││ │││││ │││││… for i16_1, i112_0 | |
│││││└└└↱│└└└│ └││└│… acc_i16_1 = acc_i16_1 + cnst[i16_1, i112_0]*cse_10[i16_1]*(cnst[i16_1, 2]*w_0_global[ltg_1[iel_inner + iel_outer*32, 2]] + cnst[i16_1, 0]*w_0_global[ltg_1[iel_inner + iel_outer*32, 0]] + cnst[i16_1, 1]*w_0_global[ltg_1[iel_inner + iel_outer*32, 1]]) {id=sum_tmp_i16_1_update} | |
│││││ ││ │ ││ │… end i16_1 | |
│││││↱ └└ └ └│ └… sum_tmp_0[i112_0] = acc_i16_1 {id=sum_tmp_3, tags=formknl} | |
││││││ │ … end i112_0 | |
││││││ │ … for ibf_gather_0 | |
│└│││└ └ … A0_global[ltg_1[iel_inner + iel_outer*32, ibf_gather_0]] = A0_global[ltg_1[iel_inner + iel_outer*32, ibf_gather_0]] + cnst_2[ibf_gather_0]*(cse_20*cse_18 + cse_26*cse_24)*cse_27 + sum_tmp_0[ibf_gather_0] + cnst_1[ibf_gather_0]*(cse_20*cse_14 + cse_26*cse_22)*cse_27 {id=insn, tags=formknl, atomic=update[A0_global]seq_cst/auto} | |
│ │││ … end ibf_gather_0 | |
│ │││ … for ibf_gather_1 | |
│ └└│ … A1_global[ltg_0[iel_inner + iel_outer*32, ibf_gather_1], 0] = A1_global[ltg_0[iel_inner + iel_outer*32, ibf_gather_1], 0] + sum_tmp_1_0[ibf_gather_1] {id=insn_0_1, tags=formknl, atomic=update[A1_global]seq_cst/auto} | |
│ │ … end ibf_gather_1 | |
│ │ … for ibf_gather_2 | |
└ └ … A1_global[ltg_0[iel_inner + iel_outer*32, ibf_gather_2], 1] = A1_global[ltg_0[iel_inner + iel_outer*32, ibf_gather_2], 1] + sum_tmp_2_0[ibf_gather_2] {id=insn_1_0, tags=formknl, atomic=update[A1_global]seq_cst/auto} | |
… end ibf_gather_2, iel_inner, iel_outer | |
--------------------------------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment