Skip to content

Instantly share code, notes, and snippets.

@inferrna
Created November 29, 2016 13:14
Show Gist options
  • Save inferrna/2c5a56c385e51b63366a96b9d29178f0 to your computer and use it in GitHub Desktop.
Save inferrna/2c5a56c385e51b63366a96b9d29178f0 to your computer and use it in GitHub Desktop.
struct class_Eigen__array {
int f0[1];
};
struct Eigen__internal__scalar_sqrt_op {
char f0;
};
struct Eigen__DSizes {
struct class_Eigen__array f0;
};
struct Eigen__TensorEvaluator_3 {
global float* f0;
struct Eigen__DSizes f1;
global struct Eigen__GpuDevice* f2;
global struct class_Eigen__TensorMap_4* f3;
};
struct class_Eigen__TensorMap_4 {
global float* f0;
struct Eigen__DSizes f1;
char f2[4];
};
struct Eigen__TensorEvaluator_0 {
global float* f0;
struct Eigen__DSizes f1;
global struct Eigen__GpuDevice* f2;
global struct class_Eigen__TensorMap* f3;
};
struct Eigen__TensorEvaluator_2 {
struct Eigen__internal__scalar_sqrt_op f0;
struct Eigen__TensorEvaluator_3 f1;
};
struct Eigen__GpuDevice {
global struct class_Eigen__StreamInterface* f0;
int f1;
char f2[4];
};
struct class_Eigen__TensorMap {
global float* f0;
struct Eigen__DSizes f1;
char f2[4];
};
struct class_Eigen__StreamInterface {
};
struct Eigen__TensorEvaluator {
struct Eigen__TensorEvaluator_0 f0;
struct Eigen__TensorEvaluator_2 f1;
};
struct class_Eigen__array_nopointers {
int f0[1];
};
struct Eigen__DSizes_nopointers {
struct class_Eigen__array_nopointers f0;
};
struct Eigen__TensorEvaluator_0_nopointers {
struct Eigen__DSizes_nopointers f0;
};
struct Eigen__internal__scalar_sqrt_op_nopointers {
char f0;
};
struct Eigen__TensorEvaluator_3_nopointers {
struct Eigen__DSizes_nopointers f0;
};
struct Eigen__TensorEvaluator_2_nopointers {
struct Eigen__internal__scalar_sqrt_op_nopointers f0;
struct Eigen__TensorEvaluator_3_nopointers f1;
};
struct Eigen__TensorEvaluator_nopointers {
struct Eigen__TensorEvaluator_0_nopointers f0;
struct Eigen__TensorEvaluator_2_nopointers f1;
};
float _ZNK5Eigen15TensorEvaluatorIKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEEENS_9GpuDeviceEE5coeffEi(struct Eigen__TensorEvaluator_2* this, int index, local int *scratch);
float _ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE5coeffEi(struct Eigen__TensorEvaluator_3* this, int index, local int *scratch);
float _ZNK5Eigen8internal14scalar_sqrt_opIfEclERKf(struct Eigen__internal__scalar_sqrt_op* this, float* a, local int *scratch);
float4 _ZN5Eigen8internal5psqrtI6float4EET_RKS3_(float4* a, local int *scratch);
float4 _ZNK5Eigen15TensorEvaluatorIKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEEENS_9GpuDeviceEE6packetILi16EEE6float4i(struct Eigen__TensorEvaluator_2* this, int index, local int *scratch);
float4 _ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE6packetILi16EEE6float4i(struct Eigen__TensorEvaluator_3* this, int index, local int *scratch);
float4 _ZNK5Eigen8internal14scalar_sqrt_opIfE8packetOpI6float4EET_RKS5_(struct Eigen__internal__scalar_sqrt_op* this, float4* a, local int *scratch);
global float* _ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE8coeffRefEi(struct Eigen__TensorEvaluator_0* this, int index, local int *scratch);
kernel void _ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiEEvT_T0_(global struct Eigen__TensorEvaluator_nopointers* eval_nopointers, global float* eval_ptr0, uint eval_ptr0_offset, global float* eval_ptr1, uint eval_ptr1_offset, int size, local int *scratch);
void _ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEEEEENS_9GpuDeviceEE10evalPacketEi(struct Eigen__TensorEvaluator* this, int i, local int *scratch);
void _ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEEEEENS_9GpuDeviceEE10evalScalarEi(struct Eigen__TensorEvaluator* this, int i, local int *scratch);
void _ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE11writePacketILi16EEEviRK6float4(struct Eigen__TensorEvaluator_0* this, int index, float4* x, local int *scratch);
void _ZN5Eigen8internal6pstoreIf6float4EEvPT_RKT0__gp(global float* to, float4* from, local int *scratch);
kernel void _ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiEEvT_T0_(global struct Eigen__TensorEvaluator_nopointers* eval_nopointers, global float* eval_ptr0, uint eval_ptr0_offset, global float* eval_ptr1, uint eval_ptr1_offset, int size, local int *scratch) {
eval_ptr1 += eval_ptr1_offset;
eval_ptr0 += eval_ptr0_offset;
struct Eigen__TensorEvaluator eval[1];
eval[0].f0.f0 = 0;
eval[0].f0.f1.f0.f0[0] = eval_nopointers[0].f0.f0.f0.f0[0];
eval[0].f0.f2 = 0;
eval[0].f0.f3 = 0;
eval[0].f1.f0.f0 = eval_nopointers[0].f1.f0.f0;
eval[0].f1.f1.f0 = 0;
eval[0].f1.f1.f1.f0.f0[0] = eval_nopointers[0].f1.f1.f0.f0.f0[0];
eval[0].f1.f1.f2 = 0;
eval[0].f1.f1.f3 = 0;
eval[0].f0.f0 = eval_ptr0;
eval[0].f1.f1.f0 = eval_ptr1;
char v20[1];
int v10[1];
int v11[1];
int v12[1];
int v13[1];
int v14[1];
int v15[1];
int v16[1];
int v17[1];
int v18[1];
int v19[1];
int v22;
int v23;
int v25;
int v26;
int v28;
int v29;
int v30;
int v34;
int v35;
int v36;
int v43;
int v45;
int v47;
int v48;
int v50;
int v51;
int v53;
int v54;
int v57;
int v59;
int v60;
int v61;
int v63;
int v64;
int v65;
int v67;
int v68;
int v71;
int v73;
int v74;
int v75;
int v9[1];
struct Eigen__TensorEvaluator* v56;
struct Eigen__TensorEvaluator* v70;
struct Eigen__TensorEvaluator* v8[1];
v1:;
v17[0] = size;
v22 = get_group_id(0);
v23 = get_local_size(0);
v25 = get_local_id(0);
v26 = (v22 * v23) + v25;
v18[0] = v26;
v28 = get_local_size(0);
v29 = get_num_groups(0);
v30 = v28 * v29;
v19[0] = v30;
v20[0] = 1;
v34 = v18[0];
v35 = v17[0];
v36 = v19[0];
v8[0] = eval;
v9[0] = v34;
v10[0] = v35;
v11[0] = v36;
v12[0] = 4;
v43 = v10[0];
v45 = (v43 / 4) * 4;
v13[0] = v45;
v47 = v11[0];
v48 = v47 * 4;
v14[0] = v48;
v50 = v9[0];
v51 = v50 * 4;
v15[0] = v51;
goto v2;
v2:;
v53 = v15[0];
v54 = v13[0];
if (v53 < v54) {
goto v3;
} else {
goto v4;
}
v3:;
v56 = v8[0];
v57 = v15[0];
_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEEEEENS_9GpuDeviceEE10evalPacketEi(v56, v57, scratch);
v59 = v14[0];
v60 = v15[0];
v61 = v60 + v59;
v15[0] = v61;
goto v2;
v4:;
v63 = v13[0];
v64 = v9[0];
v65 = v63 + v64;
v16[0] = v65;
goto v5;
v5:;
v67 = v16[0];
v68 = v10[0];
if (v67 < v68) {
goto v6;
} else {
goto v7;
}
v6:;
v70 = v8[0];
v71 = v16[0];
_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEEEEENS_9GpuDeviceEE10evalScalarEi(v70, v71, scratch);
v73 = v11[0];
v74 = v16[0];
v75 = v74 + v73;
v16[0] = v75;
goto v5;
v7:;
return;
}
void _ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEEEEENS_9GpuDeviceEE10evalPacketEi(struct Eigen__TensorEvaluator* this, int i, local int *scratch) {
float v20;
float v23;
float v26;
float v29;
float* v19;
float* v22;
float* v25;
float* v28;
float4 v18;
float4 v6[1];
int v15;
int v17;
int v3[1];
int v4[1];
int v5[1];
struct Eigen__TensorEvaluator* v2[1];
struct Eigen__TensorEvaluator* v9;
v1:;
v2[0] = this;
v3[0] = i;
v9 = v2[0];
v4[0] = 16;
v5[0] = 16;
v15 = v3[0];
v17 = v3[0];
v18 = _ZNK5Eigen15TensorEvaluatorIKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEEENS_9GpuDeviceEE6packetILi16EEE6float4i((&(v9[0].f1)), v17, scratch);
v19 = (&(((float*)&v6[0])[0]));
v20 = ((float*)&v18)[0];
v19[0] = v20;
v22 = (&(((float*)&v6[0])[1]));
v23 = ((float*)&v18)[1];
v22[0] = v23;
v25 = (&(((float*)&v6[0])[2]));
v26 = ((float*)&v18)[2];
v25[0] = v26;
v28 = (&(((float*)&v6[0])[3]));
v29 = ((float*)&v18)[3];
v28[0] = v29;
_ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE11writePacketILi16EEEviRK6float4((&(v9[0].f0)), v15, v6, scratch);
return;
}
void _ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE11writePacketILi16EEEviRK6float4(struct Eigen__TensorEvaluator_0* this, int index, float4* x, local int *scratch) {
float4* v17;
float4* v21;
float4* v3[1];
float4* v6[1];
global float* v13;
global float* v16;
global float* v20;
global float* v2[1];
int v14;
int v5[1];
struct Eigen__TensorEvaluator_0* v10;
struct Eigen__TensorEvaluator_0* v4[1];
v1:;
v4[0] = this;
v5[0] = index;
v6[0] = x;
v10 = v4[0];
v13 = (&(v10[0].f0))[0];
v14 = v5[0];
v16 = (&(v13[v14]));
v17 = v6[0];
v2[0] = v16;
v3[0] = v17;
v20 = v2[0];
v21 = v3[0];
_ZN5Eigen8internal6pstoreIf6float4EEvPT_RKT0__gp(v20, v21, scratch);
return;
}
global float* _ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE8coeffRefEi(struct Eigen__TensorEvaluator_0* this, int index, local int *scratch) {
global float* v11;
int v3[1];
int v7;
struct Eigen__TensorEvaluator_0* v2[1];
struct Eigen__TensorEvaluator_0* v6;
v1:;
v2[0] = this;
v3[0] = index;
v6 = v2[0];
v7 = v3[0];
v11 = (&(v6[0].f0))[0];
return (&(v11[v7]));
}
float _ZNK5Eigen15TensorEvaluatorIKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEEENS_9GpuDeviceEE5coeffEi(struct Eigen__TensorEvaluator_2* this, int index, local int *scratch) {
float v12;
float v14;
float v4[1];
int v11;
int v3[1];
struct Eigen__TensorEvaluator_2* v2[1];
struct Eigen__TensorEvaluator_2* v7;
v1:;
v2[0] = this;
v3[0] = index;
v7 = v2[0];
v11 = v3[0];
v12 = _ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE5coeffEi((&(v7[0].f1)), v11, scratch);
v4[0] = v12;
v14 = _ZNK5Eigen8internal14scalar_sqrt_opIfEclERKf((&(v7[0].f0)), v4, scratch);
return v14;
}
float4 _ZNK5Eigen15TensorEvaluatorIKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEEENS_9GpuDeviceEE6packetILi16EEE6float4i(struct Eigen__TensorEvaluator_2* this, int index, local int *scratch) {
float v15;
float v18;
float v21;
float v24;
float v28;
float v31;
float v34;
float v37;
float* v14;
float* v17;
float* v20;
float* v23;
float* v27;
float* v30;
float* v33;
float* v36;
float4 v13;
float4 v26;
float4 v2[1];
float4 v39;
float4 v5[1];
int v12;
int v4[1];
struct Eigen__TensorEvaluator_2* v3[1];
struct Eigen__TensorEvaluator_2* v8;
v1:;
v3[0] = this;
v4[0] = index;
v8 = v3[0];
v12 = v4[0];
v13 = _ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE6packetILi16EEE6float4i((&(v8[0].f1)), v12, scratch);
v14 = (&(((float*)&v5[0])[0]));
v15 = ((float*)&v13)[0];
v14[0] = v15;
v17 = (&(((float*)&v5[0])[1]));
v18 = ((float*)&v13)[1];
v17[0] = v18;
v20 = (&(((float*)&v5[0])[2]));
v21 = ((float*)&v13)[2];
v20[0] = v21;
v23 = (&(((float*)&v5[0])[3]));
v24 = ((float*)&v13)[3];
v23[0] = v24;
v26 = _ZNK5Eigen8internal14scalar_sqrt_opIfE8packetOpI6float4EET_RKS5_((&(v8[0].f0)), v5, scratch);
v27 = (&(((float*)&v2[0])[0]));
v28 = ((float*)&v26)[0];
v27[0] = v28;
v30 = (&(((float*)&v2[0])[1]));
v31 = ((float*)&v26)[1];
v30[0] = v31;
v33 = (&(((float*)&v2[0])[2]));
v34 = ((float*)&v26)[2];
v33[0] = v34;
v36 = (&(((float*)&v2[0])[3]));
v37 = ((float*)&v26)[3];
v36[0] = v37;
v39 = v2[0];
return v39;
}
void _ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEEEEENS_9GpuDeviceEE10evalScalarEi(struct Eigen__TensorEvaluator* this, int i, local int *scratch) {
float v10;
global float* v13;
int v12;
int v3[1];
int v9;
struct Eigen__TensorEvaluator* v2[1];
struct Eigen__TensorEvaluator* v6;
v1:;
v2[0] = this;
v3[0] = i;
v6 = v2[0];
v9 = v3[0];
v10 = _ZNK5Eigen15TensorEvaluatorIKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEEENS_9GpuDeviceEE5coeffEi((&(v6[0].f1)), v9, scratch);
v12 = v3[0];
v13 = _ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE8coeffRefEi((&(v6[0].f0)), v12, scratch);
v13[0] = v10;
return;
}
void _ZN5Eigen8internal6pstoreIf6float4EEvPT_RKT0__gp(global float* to, float4* from, local int *scratch) {
float4* v3[1];
float4* v8;
global float* v2[1];
global float* v6;
global float4* v7;
v1:;
v2[0] = to;
v3[0] = from;
v6 = v2[0];
v7 = (global float4*)v6;
v8 = v3[0];
for(int __i=0; __i < 4; __i++) {;
((global int *)((global char*)v7))[__i] = (( int *)((char*)v8))[__i];
}
;
return;
}
float _ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE5coeffEi(struct Eigen__TensorEvaluator_3* this, int index, local int *scratch) {
float v16;
global float* v10;
global float* v13;
global float* v15;
global float* v2[1];
int v11;
int v4[1];
struct Eigen__TensorEvaluator_3* v3[1];
struct Eigen__TensorEvaluator_3* v7;
v1:;
v3[0] = this;
v4[0] = index;
v7 = v3[0];
v10 = (&(v7[0].f0))[0];
v11 = v4[0];
v13 = (&(v10[v11]));
v2[0] = v13;
v15 = v2[0];
v16 = v15[0];
return v16;
}
float4 _ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE6packetILi16EEE6float4i(struct Eigen__TensorEvaluator_3* this, int index, local int *scratch) {
float v18;
float v22;
float v26;
float v30;
float v33;
float v36;
float v39;
float v42;
float v46;
float v49;
float v52;
float v55;
float* v32;
float* v35;
float* v38;
float* v41;
float* v45;
float* v48;
float* v51;
float* v54;
float4 v2[1];
float4 v31;
float4 v44;
float4 v4[1];
float4 v57;
global float* v12;
global float* v15;
global float* v17;
global float* v19;
global float* v23;
global float* v27;
global float* v3[1];
int v13;
int v6[1];
struct Eigen__TensorEvaluator_3* v5[1];
struct Eigen__TensorEvaluator_3* v9;
v1:;
v5[0] = this;
v6[0] = index;
v9 = v5[0];
v12 = (&(v9[0].f0))[0];
v13 = v6[0];
v15 = (&(v12[v13]));
v3[0] = v15;
v17 = v3[0];
v18 = v17[0];
v19 = v3[0];
v22 = (&(v19[1]))[0];
v23 = v3[0];
v26 = (&(v23[2]))[0];
v27 = v3[0];
v30 = (&(v27[3]))[0];
v31 = (float4)(v18, v22, v26, v30);
v32 = (&(((float*)&v2[0])[0]));
v33 = ((float*)&v31)[0];
v32[0] = v33;
v35 = (&(((float*)&v2[0])[1]));
v36 = ((float*)&v31)[1];
v35[0] = v36;
v38 = (&(((float*)&v2[0])[2]));
v39 = ((float*)&v31)[2];
v38[0] = v39;
v41 = (&(((float*)&v2[0])[3]));
v42 = ((float*)&v31)[3];
v41[0] = v42;
v44 = v2[0];
v45 = (&(((float*)&v4[0])[0]));
v46 = ((float*)&v44)[0];
v45[0] = v46;
v48 = (&(((float*)&v4[0])[1]));
v49 = ((float*)&v44)[1];
v48[0] = v49;
v51 = (&(((float*)&v4[0])[2]));
v52 = ((float*)&v44)[2];
v51[0] = v52;
v54 = (&(((float*)&v4[0])[3]));
v55 = ((float*)&v44)[3];
v54[0] = v55;
v57 = v4[0];
return v57;
}
float4 _ZNK5Eigen8internal14scalar_sqrt_opIfE8packetOpI6float4EET_RKS5_(struct Eigen__internal__scalar_sqrt_op* this, float4* a, local int *scratch) {
float v12;
float v15;
float v18;
float v21;
float* v10;
float* v14;
float* v17;
float* v20;
float4 v23;
float4 v2[1];
float4 v9;
float4* v4[1];
float4* v8;
struct Eigen__internal__scalar_sqrt_op* v3[1];
struct Eigen__internal__scalar_sqrt_op* v7;
v1:;
v3[0] = this;
v4[0] = a;
v7 = v3[0];
v8 = v4[0];
v9 = _ZN5Eigen8internal5psqrtI6float4EET_RKS3_(v8, scratch);
v10 = (&(((float*)&v2[0])[0]));
v12 = ((float*)&v9)[0];
v10[0] = v12;
v14 = (&(((float*)&v2[0])[1]));
v15 = ((float*)&v9)[1];
v14[0] = v15;
v17 = (&(((float*)&v2[0])[2]));
v18 = ((float*)&v9)[2];
v17[0] = v18;
v20 = (&(((float*)&v2[0])[3]));
v21 = ((float*)&v9)[3];
v20[0] = v21;
v23 = v2[0];
return v23;
}
float _ZNK5Eigen8internal14scalar_sqrt_opIfEclERKf(struct Eigen__internal__scalar_sqrt_op* this, float* a, local int *scratch) {
float v11;
float v13;
float* v10;
float* v2[1];
float* v4[1];
float* v8;
struct Eigen__internal__scalar_sqrt_op* v3[1];
struct Eigen__internal__scalar_sqrt_op* v7;
v1:;
v3[0] = this;
v4[0] = a;
v7 = v3[0];
v8 = v4[0];
v2[0] = v8;
v10 = v2[0];
v11 = v10[0];
v13 = sqrt(v11);
return (float)v13;
}
float4 _ZN5Eigen8internal5psqrtI6float4EET_RKS3_(float4* a, local int *scratch) {
float v12;
float v13;
float v16;
float v17;
float v20;
float v21;
float v24;
float v27;
float v30;
float v33;
float v8;
float v9;
float* v23;
float* v26;
float* v29;
float* v32;
float4 v22;
float4 v2[1];
float4 v35;
float4* v10;
float4* v14;
float4* v18;
float4* v3[1];
float4* v5;
v1:;
v3[0] = a;
v5 = v3[0];
v8 = (&(((float*)&v5[0])[0]))[0];
v9 = sqrt(v8);
v10 = v3[0];
v12 = (&(((float*)&v10[0])[1]))[0];
v13 = sqrt(v12);
v14 = v3[0];
v16 = (&(((float*)&v14[0])[2]))[0];
v17 = sqrt(v16);
v18 = v3[0];
v20 = (&(((float*)&v18[0])[3]))[0];
v21 = sqrt(v20);
v22 = (float4)(v9, v13, v17, v21);
v23 = (&(((float*)&v2[0])[0]));
v24 = ((float*)&v22)[0];
v23[0] = v24;
v26 = (&(((float*)&v2[0])[1]));
v27 = ((float*)&v22)[1];
v26[0] = v27;
v29 = (&(((float*)&v2[0])[2]));
v30 = ((float*)&v22)[2];
v29[0] = v30;
v32 = (&(((float*)&v2[0])[3]));
v33 = ((float*)&v22)[3];
v32[0] = v33;
v35 = v2[0];
return v35;
}
; ModuleID = 'test/tf/samples/cwise_op_gpu_sqrt-device-noopt.ll'
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
%struct.__cuda_builtin_blockIdx_t = type { i8 }
%struct.__cuda_builtin_blockDim_t = type { i8 }
%struct.__cuda_builtin_threadIdx_t = type { i8 }
%struct.__cuda_builtin_gridDim_t = type { i8 }
%"struct.Eigen::TensorEvaluator" = type { %"struct.Eigen::TensorEvaluator.0", %"struct.Eigen::TensorEvaluator.2" }
%"struct.Eigen::TensorEvaluator.0" = type { float*, %"struct.Eigen::DSizes", %"struct.Eigen::GpuDevice"*, %"class.Eigen::TensorMap"* }
%"struct.Eigen::DSizes" = type { %"class.Eigen::array" }
%"class.Eigen::array" = type { [1 x i32] }
%"struct.Eigen::GpuDevice" = type <{ %"class.Eigen::StreamInterface"*, i32, [4 x i8] }>
%"class.Eigen::StreamInterface" = type { i32 (...)** }
%"class.Eigen::TensorMap" = type <{ float*, %"struct.Eigen::DSizes", [4 x i8] }>
%"struct.Eigen::TensorEvaluator.2" = type { %"struct.Eigen::internal::scalar_sqrt_op", %"struct.Eigen::TensorEvaluator.3" }
%"struct.Eigen::internal::scalar_sqrt_op" = type { i8 }
%"struct.Eigen::TensorEvaluator.3" = type { float*, %"struct.Eigen::DSizes", %"struct.Eigen::GpuDevice"*, %"class.Eigen::TensorMap.4"* }
%"class.Eigen::TensorMap.4" = type <{ float*, %"struct.Eigen::DSizes", [4 x i8] }>
%struct.float4 = type { float, float, float, float }
%"struct.Eigen::TensorEvaluator.7" = type { %"struct.Eigen::TensorEvaluator.0", %"struct.Eigen::TensorEvaluator.8" }
%"struct.Eigen::TensorEvaluator.8" = type { %"struct.Eigen::internal::scalar_sqrt_gradient_op", %"struct.Eigen::TensorEvaluator.3", %"struct.Eigen::TensorEvaluator.9" }
%"struct.Eigen::internal::scalar_sqrt_gradient_op" = type { i8 }
%"struct.Eigen::TensorEvaluator.9" = type { float*, %"struct.Eigen::DSizes.10", %"struct.Eigen::GpuDevice"*, %"class.Eigen::TensorMap.12"* }
%"struct.Eigen::DSizes.10" = type { %"class.Eigen::array.11" }
%"class.Eigen::array.11" = type { [1 x i64] }
%"class.Eigen::TensorMap.12" = type { float*, %"struct.Eigen::DSizes.10" }
$_ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiEEvT_T0_ = comdat any
$_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEEEEENS_9GpuDeviceEE10evalPacketEi = comdat any
$_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEEEEENS_9GpuDeviceEE10evalScalarEi = comdat any
$_ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE11writePacketILi16EEEviRK6float4 = comdat any
$_ZNK5Eigen15TensorEvaluatorIKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEEENS_9GpuDeviceEE6packetILi16EEE6float4i = comdat any
$_ZN5Eigen8internal6pstoreIf6float4EEvPT_RKT0_ = comdat any
$_ZNK5Eigen8internal14scalar_sqrt_opIfE8packetOpI6float4EET_RKS5_ = comdat any
$_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE6packetILi16EEE6float4i = comdat any
$_ZN5Eigen8internal5psqrtI6float4EET_RKS3_ = comdat any
$_ZNK5Eigen15TensorEvaluatorIKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEEENS_9GpuDeviceEE5coeffEi = comdat any
$_ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE8coeffRefEi = comdat any
$_ZNK5Eigen8internal14scalar_sqrt_opIfEclERKf = comdat any
$_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE5coeffEi = comdat any
$_ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_19TensorCwiseBinaryOpINS0_23scalar_sqrt_gradient_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEKNS4_INS5_ISC_Li1ELi1ElEELi16ES7_EEEEEENS_9GpuDeviceEEElEEvT_T0_ = comdat any
$_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_19TensorCwiseBinaryOpINS_8internal23scalar_sqrt_gradient_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEKNS2_INS3_ISB_Li1ELi1ElEELi16ES5_EEEEEENS_9GpuDeviceEE10evalPacketEl = comdat any
$_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_19TensorCwiseBinaryOpINS_8internal23scalar_sqrt_gradient_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEKNS2_INS3_ISB_Li1ELi1ElEELi16ES5_EEEEEENS_9GpuDeviceEE10evalScalarEl = comdat any
$_ZNK5Eigen15TensorEvaluatorIKNS_19TensorCwiseBinaryOpINS_8internal23scalar_sqrt_gradient_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS5_INS6_IS7_Li1ELi1ElEELi16ES9_EEEENS_9GpuDeviceEE6packetILi16EEE6float4l = comdat any
$_ZNK5Eigen8internal23scalar_sqrt_gradient_opIfE8packetOpI6float4EEKT_RS6_S7_ = comdat any
$_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1ElEELi16ENS_11MakePointerEEENS_9GpuDeviceEE6packetILi16EEE6float4l = comdat any
$_ZN5Eigen8internal5pset1I6float4EET_RKNS0_15unpacket_traitsIS3_E4typeE = comdat any
$_ZN5Eigen8internal5pconjI6float4EET_RKS3_ = comdat any
$_ZN5Eigen8internal4pdivI6float4EET_RKS3_S5_ = comdat any
$_ZN5Eigen8internal4pmulI6float4EET_RKS3_S5_ = comdat any
$_ZNK5Eigen15TensorEvaluatorIKNS_19TensorCwiseBinaryOpINS_8internal23scalar_sqrt_gradient_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS5_INS6_IS7_Li1ELi1ElEELi16ES9_EEEENS_9GpuDeviceEE5coeffEl = comdat any
$_ZNK5Eigen8internal23scalar_sqrt_gradient_opIfEclERKfS4_ = comdat any
$_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1ElEELi16ENS_11MakePointerEEENS_9GpuDeviceEE5coeffEl = comdat any
$_ZN5Eigen6numext4conjIfEENS_8internal11conj_retvalINS2_36global_math_functions_filtering_baseIT_vE4typeEE4typeERKS5_ = comdat any
$_ZN5Eigen8internal9conj_implIfLb0EE3runERKf = comdat any
@.str = private unnamed_addr constant [5 x i8] c"NONE\00", align 1
@blockIdx = extern_weak addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1
@blockDim = extern_weak addrspace(1) global %struct.__cuda_builtin_blockDim_t, align 1
@threadIdx = extern_weak addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1
@gridDim = extern_weak addrspace(1) global %struct.__cuda_builtin_gridDim_t, align 1
@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @_ZL21__nvvm_reflect_anchorv to i8*)], section "llvm.metadata"
; Function Attrs: nounwind
define internal i32 @_ZL21__nvvm_reflect_anchorv() #0 {
%1 = call i32 @__nvvm_reflect(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0)) #3
ret i32 %1
}
; Function Attrs: nounwind readnone
declare i32 @__nvvm_reflect(i8*) #1
define weak_odr void @_ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiEEvT_T0_(%"struct.Eigen::TensorEvaluator"* byval align 8 %eval, i32 %size) #2 comdat {
%1 = alloca %"struct.Eigen::TensorEvaluator"*, align 8
%2 = alloca i32, align 4
%3 = alloca i32, align 4
%4 = alloca i32, align 4
%PacketSize.i = alloca i32, align 4
%vectorized_size.i = alloca i32, align 4
%vectorized_step_size.i = alloca i32, align 4
%i.i = alloca i32, align 4
%i1.i = alloca i32, align 4
%5 = alloca i32, align 4
%first_index = alloca i32, align 4
%step_size = alloca i32, align 4
%vectorizable = alloca i8, align 1
store i32 %size, i32* %5, align 4
%6 = call i32 @llvm.ptx.read.ctaid.x() #7
%7 = call i32 @llvm.ptx.read.ntid.x() #7
%8 = mul i32 %6, %7
%9 = call i32 @llvm.ptx.read.tid.x() #7
%10 = add i32 %8, %9
store i32 %10, i32* %first_index, align 4
%11 = call i32 @llvm.ptx.read.ntid.x() #7
%12 = call i32 @llvm.ptx.read.nctaid.x() #7
%13 = mul i32 %11, %12
store i32 %13, i32* %step_size, align 4
store i8 1, i8* %vectorizable, align 1
%14 = load i32, i32* %first_index, align 4
%15 = load i32, i32* %5, align 4
%16 = load i32, i32* %step_size, align 4
store %"struct.Eigen::TensorEvaluator"* %eval, %"struct.Eigen::TensorEvaluator"** %1, align 8
store i32 %14, i32* %2, align 4
store i32 %15, i32* %3, align 4
store i32 %16, i32* %4, align 4
store i32 4, i32* %PacketSize.i, align 4
%17 = load i32, i32* %3, align 4
%18 = sdiv i32 %17, 4
%19 = mul nsw i32 %18, 4
store i32 %19, i32* %vectorized_size.i, align 4
%20 = load i32, i32* %4, align 4
%21 = mul nsw i32 %20, 4
store i32 %21, i32* %vectorized_step_size.i, align 4
%22 = load i32, i32* %2, align 4
%23 = mul nsw i32 %22, 4
store i32 %23, i32* %i.i, align 4
br label %24
; <label>:24 ; preds = %28, %0
%25 = load i32, i32* %i.i, align 4
%26 = load i32, i32* %vectorized_size.i, align 4
%27 = icmp slt i32 %25, %26
br i1 %27, label %28, label %34
; <label>:28 ; preds = %24
%29 = load %"struct.Eigen::TensorEvaluator"*, %"struct.Eigen::TensorEvaluator"** %1, align 8
%30 = load i32, i32* %i.i, align 4
call void @_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEEEEENS_9GpuDeviceEE10evalPacketEi(%"struct.Eigen::TensorEvaluator"* %29, i32 %30)
%31 = load i32, i32* %vectorized_step_size.i, align 4
%32 = load i32, i32* %i.i, align 4
%33 = add nsw i32 %32, %31
store i32 %33, i32* %i.i, align 4
br label %24
; <label>:34 ; preds = %24
%35 = load i32, i32* %vectorized_size.i, align 4
%36 = load i32, i32* %2, align 4
%37 = add nsw i32 %35, %36
store i32 %37, i32* %i1.i, align 4
br label %38
; <label>:38 ; preds = %42, %34
%39 = load i32, i32* %i1.i, align 4
%40 = load i32, i32* %3, align 4
%41 = icmp slt i32 %39, %40
br i1 %41, label %42, label %_ZN5Eigen8internal19EigenMetaKernelEvalINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiLb1EE3runERSL_iii.exit
; <label>:42 ; preds = %38
%43 = load %"struct.Eigen::TensorEvaluator"*, %"struct.Eigen::TensorEvaluator"** %1, align 8
%44 = load i32, i32* %i1.i, align 4
call void @_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEEEEENS_9GpuDeviceEE10evalScalarEi(%"struct.Eigen::TensorEvaluator"* %43, i32 %44)
%45 = load i32, i32* %4, align 4
%46 = load i32, i32* %i1.i, align 4
%47 = add nsw i32 %46, %45
store i32 %47, i32* %i1.i, align 4
br label %38
_ZN5Eigen8internal19EigenMetaKernelEvalINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiLb1EE3runERSL_iii.exit: ; preds = %38
ret void
}
; Function Attrs: nounwind readnone
declare i32 @llvm.ptx.read.ctaid.x() #3
; Function Attrs: nounwind readnone
declare i32 @llvm.ptx.read.ntid.x() #3
; Function Attrs: nounwind readnone
declare i32 @llvm.ptx.read.tid.x() #3
; Function Attrs: nounwind readnone
declare i32 @llvm.ptx.read.nctaid.x() #3
; Function Attrs: inlinehint
define linkonce_odr void @_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEEEEENS_9GpuDeviceEE10evalPacketEi(%"struct.Eigen::TensorEvaluator"* %this, i32 %i) #4 comdat align 2 {
%1 = alloca %"struct.Eigen::TensorEvaluator"*, align 8
%2 = alloca i32, align 4
%LhsStoreMode = alloca i32, align 4
%RhsLoadMode = alloca i32, align 4
%3 = alloca %struct.float4, align 4
store %"struct.Eigen::TensorEvaluator"* %this, %"struct.Eigen::TensorEvaluator"** %1, align 8
store i32 %i, i32* %2, align 4
%4 = load %"struct.Eigen::TensorEvaluator"*, %"struct.Eigen::TensorEvaluator"** %1, align 8
store i32 16, i32* %LhsStoreMode, align 4
store i32 16, i32* %RhsLoadMode, align 4
%5 = getelementptr inbounds %"struct.Eigen::TensorEvaluator", %"struct.Eigen::TensorEvaluator"* %4, i32 0, i32 0
%6 = load i32, i32* %2, align 4
%7 = getelementptr inbounds %"struct.Eigen::TensorEvaluator", %"struct.Eigen::TensorEvaluator"* %4, i32 0, i32 1
%8 = load i32, i32* %2, align 4
%9 = call %struct.float4 @_ZNK5Eigen15TensorEvaluatorIKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEEENS_9GpuDeviceEE6packetILi16EEE6float4i(%"struct.Eigen::TensorEvaluator.2"* %7, i32 %8)
%10 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 0
%11 = extractvalue %struct.float4 %9, 0
store float %11, float* %10, align 4
%12 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 1
%13 = extractvalue %struct.float4 %9, 1
store float %13, float* %12, align 4
%14 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 2
%15 = extractvalue %struct.float4 %9, 2
store float %15, float* %14, align 4
%16 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 3
%17 = extractvalue %struct.float4 %9, 3
store float %17, float* %16, align 4
call void @_ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE11writePacketILi16EEEviRK6float4(%"struct.Eigen::TensorEvaluator.0"* %5, i32 %6, %struct.float4* dereferenceable(16) %3)
ret void
}
; Function Attrs: inlinehint
define linkonce_odr void @_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEEEEENS_9GpuDeviceEE10evalScalarEi(%"struct.Eigen::TensorEvaluator"* %this, i32 %i) #4 comdat align 2 {
%1 = alloca %"struct.Eigen::TensorEvaluator"*, align 8
%2 = alloca i32, align 4
store %"struct.Eigen::TensorEvaluator"* %this, %"struct.Eigen::TensorEvaluator"** %1, align 8
store i32 %i, i32* %2, align 4
%3 = load %"struct.Eigen::TensorEvaluator"*, %"struct.Eigen::TensorEvaluator"** %1, align 8
%4 = getelementptr inbounds %"struct.Eigen::TensorEvaluator", %"struct.Eigen::TensorEvaluator"* %3, i32 0, i32 1
%5 = load i32, i32* %2, align 4
%6 = call float @_ZNK5Eigen15TensorEvaluatorIKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEEENS_9GpuDeviceEE5coeffEi(%"struct.Eigen::TensorEvaluator.2"* %4, i32 %5)
%7 = getelementptr inbounds %"struct.Eigen::TensorEvaluator", %"struct.Eigen::TensorEvaluator"* %3, i32 0, i32 0
%8 = load i32, i32* %2, align 4
%9 = call dereferenceable(4) float* @_ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE8coeffRefEi(%"struct.Eigen::TensorEvaluator.0"* %7, i32 %8)
store float %6, float* %9, align 4
ret void
}
; Function Attrs: inlinehint
define linkonce_odr void @_ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE11writePacketILi16EEEviRK6float4(%"struct.Eigen::TensorEvaluator.0"* %this, i32 %index, %struct.float4* dereferenceable(16) %x) #4 comdat align 2 {
%1 = alloca float*, align 8
%2 = alloca %struct.float4*, align 8
%3 = alloca %"struct.Eigen::TensorEvaluator.0"*, align 8
%4 = alloca i32, align 4
%5 = alloca %struct.float4*, align 8
store %"struct.Eigen::TensorEvaluator.0"* %this, %"struct.Eigen::TensorEvaluator.0"** %3, align 8
store i32 %index, i32* %4, align 4
store %struct.float4* %x, %struct.float4** %5, align 8
%6 = load %"struct.Eigen::TensorEvaluator.0"*, %"struct.Eigen::TensorEvaluator.0"** %3, align 8
%7 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.0", %"struct.Eigen::TensorEvaluator.0"* %6, i32 0, i32 0
%8 = load float*, float** %7, align 8
%9 = load i32, i32* %4, align 4
%10 = sext i32 %9 to i64
%11 = getelementptr inbounds float, float* %8, i64 %10
%12 = load %struct.float4*, %struct.float4** %5, align 8
store float* %11, float** %1, align 8
store %struct.float4* %12, %struct.float4** %2, align 8
%13 = load float*, float** %1, align 8
%14 = load %struct.float4*, %struct.float4** %2, align 8
call void @_ZN5Eigen8internal6pstoreIf6float4EEvPT_RKT0_(float* %13, %struct.float4* dereferenceable(16) %14)
ret void
}
; Function Attrs: inlinehint
define linkonce_odr %struct.float4 @_ZNK5Eigen15TensorEvaluatorIKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEEENS_9GpuDeviceEE6packetILi16EEE6float4i(%"struct.Eigen::TensorEvaluator.2"* %this, i32 %index) #4 comdat align 2 {
%1 = alloca %struct.float4, align 4
%2 = alloca %"struct.Eigen::TensorEvaluator.2"*, align 8
%3 = alloca i32, align 4
%4 = alloca %struct.float4, align 4
store %"struct.Eigen::TensorEvaluator.2"* %this, %"struct.Eigen::TensorEvaluator.2"** %2, align 8
store i32 %index, i32* %3, align 4
%5 = load %"struct.Eigen::TensorEvaluator.2"*, %"struct.Eigen::TensorEvaluator.2"** %2, align 8
%6 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.2", %"struct.Eigen::TensorEvaluator.2"* %5, i32 0, i32 0
%7 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.2", %"struct.Eigen::TensorEvaluator.2"* %5, i32 0, i32 1
%8 = load i32, i32* %3, align 4
%9 = call %struct.float4 @_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE6packetILi16EEE6float4i(%"struct.Eigen::TensorEvaluator.3"* %7, i32 %8)
%10 = getelementptr inbounds %struct.float4, %struct.float4* %4, i32 0, i32 0
%11 = extractvalue %struct.float4 %9, 0
store float %11, float* %10, align 4
%12 = getelementptr inbounds %struct.float4, %struct.float4* %4, i32 0, i32 1
%13 = extractvalue %struct.float4 %9, 1
store float %13, float* %12, align 4
%14 = getelementptr inbounds %struct.float4, %struct.float4* %4, i32 0, i32 2
%15 = extractvalue %struct.float4 %9, 2
store float %15, float* %14, align 4
%16 = getelementptr inbounds %struct.float4, %struct.float4* %4, i32 0, i32 3
%17 = extractvalue %struct.float4 %9, 3
store float %17, float* %16, align 4
%18 = call %struct.float4 @_ZNK5Eigen8internal14scalar_sqrt_opIfE8packetOpI6float4EET_RKS5_(%"struct.Eigen::internal::scalar_sqrt_op"* %6, %struct.float4* dereferenceable(16) %4)
%19 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 0
%20 = extractvalue %struct.float4 %18, 0
store float %20, float* %19, align 4
%21 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 1
%22 = extractvalue %struct.float4 %18, 1
store float %22, float* %21, align 4
%23 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 2
%24 = extractvalue %struct.float4 %18, 2
store float %24, float* %23, align 4
%25 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 3
%26 = extractvalue %struct.float4 %18, 3
store float %26, float* %25, align 4
%27 = load %struct.float4, %struct.float4* %1, align 4
ret %struct.float4 %27
}
; Function Attrs: inlinehint nounwind
define linkonce_odr void @_ZN5Eigen8internal6pstoreIf6float4EEvPT_RKT0_(float* %to, %struct.float4* dereferenceable(16) %from) #5 comdat {
%1 = alloca float*, align 8
%2 = alloca %struct.float4*, align 8
store float* %to, float** %1, align 8
store %struct.float4* %from, %struct.float4** %2, align 8
%3 = load float*, float** %1, align 8
%4 = bitcast float* %3 to %struct.float4*
%5 = load %struct.float4*, %struct.float4** %2, align 8
%6 = bitcast %struct.float4* %4 to i8*
%7 = bitcast %struct.float4* %5 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* %7, i64 16, i32 4, i1 false)
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #6
; Function Attrs: inlinehint
define linkonce_odr %struct.float4 @_ZNK5Eigen8internal14scalar_sqrt_opIfE8packetOpI6float4EET_RKS5_(%"struct.Eigen::internal::scalar_sqrt_op"* %this, %struct.float4* dereferenceable(16) %a) #4 comdat align 2 {
%1 = alloca %struct.float4, align 4
%2 = alloca %"struct.Eigen::internal::scalar_sqrt_op"*, align 8
%3 = alloca %struct.float4*, align 8
store %"struct.Eigen::internal::scalar_sqrt_op"* %this, %"struct.Eigen::internal::scalar_sqrt_op"** %2, align 8
store %struct.float4* %a, %struct.float4** %3, align 8
%4 = load %"struct.Eigen::internal::scalar_sqrt_op"*, %"struct.Eigen::internal::scalar_sqrt_op"** %2, align 8
%5 = load %struct.float4*, %struct.float4** %3, align 8
%6 = call %struct.float4 @_ZN5Eigen8internal5psqrtI6float4EET_RKS3_(%struct.float4* dereferenceable(16) %5)
%7 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 0
%8 = extractvalue %struct.float4 %6, 0
store float %8, float* %7, align 4
%9 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 1
%10 = extractvalue %struct.float4 %6, 1
store float %10, float* %9, align 4
%11 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 2
%12 = extractvalue %struct.float4 %6, 2
store float %12, float* %11, align 4
%13 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 3
%14 = extractvalue %struct.float4 %6, 3
store float %14, float* %13, align 4
%15 = load %struct.float4, %struct.float4* %1, align 4
ret %struct.float4 %15
}
; Function Attrs: inlinehint
define linkonce_odr %struct.float4 @_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE6packetILi16EEE6float4i(%"struct.Eigen::TensorEvaluator.3"* %this, i32 %index) #4 comdat align 2 {
%1 = alloca %struct.float4, align 4
%2 = alloca float*, align 8
%3 = alloca %struct.float4, align 4
%4 = alloca %"struct.Eigen::TensorEvaluator.3"*, align 8
%5 = alloca i32, align 4
store %"struct.Eigen::TensorEvaluator.3"* %this, %"struct.Eigen::TensorEvaluator.3"** %4, align 8
store i32 %index, i32* %5, align 4
%6 = load %"struct.Eigen::TensorEvaluator.3"*, %"struct.Eigen::TensorEvaluator.3"** %4, align 8
%7 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.3", %"struct.Eigen::TensorEvaluator.3"* %6, i32 0, i32 0
%8 = load float*, float** %7, align 8
%9 = load i32, i32* %5, align 4
%10 = sext i32 %9 to i64
%11 = getelementptr inbounds float, float* %8, i64 %10
store float* %11, float** %2, align 8
%12 = load float*, float** %2, align 8
%13 = load float, float* %12, align 4
%14 = load float*, float** %2, align 8
%15 = getelementptr inbounds float, float* %14, i64 1
%16 = load float, float* %15, align 4
%17 = load float*, float** %2, align 8
%18 = getelementptr inbounds float, float* %17, i64 2
%19 = load float, float* %18, align 4
%20 = load float*, float** %2, align 8
%21 = getelementptr inbounds float, float* %20, i64 3
%22 = load float, float* %21, align 4
%23 = call %struct.float4 @_Z11make_float4ffff(float %13, float %16, float %19, float %22)
%24 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 0
%25 = extractvalue %struct.float4 %23, 0
store float %25, float* %24, align 4
%26 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 1
%27 = extractvalue %struct.float4 %23, 1
store float %27, float* %26, align 4
%28 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 2
%29 = extractvalue %struct.float4 %23, 2
store float %29, float* %28, align 4
%30 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 3
%31 = extractvalue %struct.float4 %23, 3
store float %31, float* %30, align 4
%32 = load %struct.float4, %struct.float4* %1, align 4
%33 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 0
%34 = extractvalue %struct.float4 %32, 0
store float %34, float* %33, align 4
%35 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 1
%36 = extractvalue %struct.float4 %32, 1
store float %36, float* %35, align 4
%37 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 2
%38 = extractvalue %struct.float4 %32, 2
store float %38, float* %37, align 4
%39 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 3
%40 = extractvalue %struct.float4 %32, 3
store float %40, float* %39, align 4
%41 = load %struct.float4, %struct.float4* %3, align 4
ret %struct.float4 %41
}
; Function Attrs: inlinehint
define linkonce_odr %struct.float4 @_ZN5Eigen8internal5psqrtI6float4EET_RKS3_(%struct.float4* dereferenceable(16) %a) #4 comdat {
%1 = alloca %struct.float4, align 4
%2 = alloca %struct.float4*, align 8
store %struct.float4* %a, %struct.float4** %2, align 8
%3 = load %struct.float4*, %struct.float4** %2, align 8
%4 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 0
%5 = load float, float* %4, align 4
%6 = call float @sqrtf(float %5) #7
%7 = load %struct.float4*, %struct.float4** %2, align 8
%8 = getelementptr inbounds %struct.float4, %struct.float4* %7, i32 0, i32 1
%9 = load float, float* %8, align 4
%10 = call float @sqrtf(float %9) #7
%11 = load %struct.float4*, %struct.float4** %2, align 8
%12 = getelementptr inbounds %struct.float4, %struct.float4* %11, i32 0, i32 2
%13 = load float, float* %12, align 4
%14 = call float @sqrtf(float %13) #7
%15 = load %struct.float4*, %struct.float4** %2, align 8
%16 = getelementptr inbounds %struct.float4, %struct.float4* %15, i32 0, i32 3
%17 = load float, float* %16, align 4
%18 = call float @sqrtf(float %17) #7
%19 = call %struct.float4 @_Z11make_float4ffff(float %6, float %10, float %14, float %18)
%20 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 0
%21 = extractvalue %struct.float4 %19, 0
store float %21, float* %20, align 4
%22 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 1
%23 = extractvalue %struct.float4 %19, 1
store float %23, float* %22, align 4
%24 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 2
%25 = extractvalue %struct.float4 %19, 2
store float %25, float* %24, align 4
%26 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 3
%27 = extractvalue %struct.float4 %19, 3
store float %27, float* %26, align 4
%28 = load %struct.float4, %struct.float4* %1, align 4
ret %struct.float4 %28
}
declare %struct.float4 @_Z11make_float4ffff(float, float, float, float) #2
; Function Attrs: nounwind
declare float @sqrtf(float) #0
define linkonce_odr float @_ZNK5Eigen15TensorEvaluatorIKNS_18TensorCwiseUnaryOpINS_8internal14scalar_sqrt_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEEENS_9GpuDeviceEE5coeffEi(%"struct.Eigen::TensorEvaluator.2"* %this, i32 %index) #2 comdat align 2 {
%1 = alloca %"struct.Eigen::TensorEvaluator.2"*, align 8
%2 = alloca i32, align 4
%3 = alloca float, align 4
store %"struct.Eigen::TensorEvaluator.2"* %this, %"struct.Eigen::TensorEvaluator.2"** %1, align 8
store i32 %index, i32* %2, align 4
%4 = load %"struct.Eigen::TensorEvaluator.2"*, %"struct.Eigen::TensorEvaluator.2"** %1, align 8
%5 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.2", %"struct.Eigen::TensorEvaluator.2"* %4, i32 0, i32 0
%6 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.2", %"struct.Eigen::TensorEvaluator.2"* %4, i32 0, i32 1
%7 = load i32, i32* %2, align 4
%8 = call float @_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE5coeffEi(%"struct.Eigen::TensorEvaluator.3"* %6, i32 %7)
store float %8, float* %3, align 4
%9 = call float @_ZNK5Eigen8internal14scalar_sqrt_opIfEclERKf(%"struct.Eigen::internal::scalar_sqrt_op"* %5, float* dereferenceable(4) %3)
ret float %9
}
; Function Attrs: inlinehint nounwind
define linkonce_odr dereferenceable(4) float* @_ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE8coeffRefEi(%"struct.Eigen::TensorEvaluator.0"* %this, i32 %index) #5 comdat align 2 {
%1 = alloca %"struct.Eigen::TensorEvaluator.0"*, align 8
%2 = alloca i32, align 4
store %"struct.Eigen::TensorEvaluator.0"* %this, %"struct.Eigen::TensorEvaluator.0"** %1, align 8
store i32 %index, i32* %2, align 4
%3 = load %"struct.Eigen::TensorEvaluator.0"*, %"struct.Eigen::TensorEvaluator.0"** %1, align 8
%4 = load i32, i32* %2, align 4
%5 = sext i32 %4 to i64
%6 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.0", %"struct.Eigen::TensorEvaluator.0"* %3, i32 0, i32 0
%7 = load float*, float** %6, align 8
%8 = getelementptr inbounds float, float* %7, i64 %5
ret float* %8
}
; Function Attrs: inlinehint
define linkonce_odr float @_ZNK5Eigen8internal14scalar_sqrt_opIfEclERKf(%"struct.Eigen::internal::scalar_sqrt_op"* %this, float* dereferenceable(4) %a) #4 comdat align 2 {
%1 = alloca float*, align 8
%2 = alloca %"struct.Eigen::internal::scalar_sqrt_op"*, align 8
%3 = alloca float*, align 8
store %"struct.Eigen::internal::scalar_sqrt_op"* %this, %"struct.Eigen::internal::scalar_sqrt_op"** %2, align 8
store float* %a, float** %3, align 8
%4 = load %"struct.Eigen::internal::scalar_sqrt_op"*, %"struct.Eigen::internal::scalar_sqrt_op"** %2, align 8
%5 = load float*, float** %3, align 8
store float* %5, float** %1, align 8
%6 = load float*, float** %1, align 8
%7 = load float, float* %6, align 4
%8 = fpext float %7 to double
%9 = call double @sqrt(double %8) #7
%10 = fptrunc double %9 to float
ret float %10
}
; Function Attrs: inlinehint
define linkonce_odr float @_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE5coeffEi(%"struct.Eigen::TensorEvaluator.3"* %this, i32 %index) #4 comdat align 2 {
%1 = alloca float*, align 8
%2 = alloca %"struct.Eigen::TensorEvaluator.3"*, align 8
%3 = alloca i32, align 4
store %"struct.Eigen::TensorEvaluator.3"* %this, %"struct.Eigen::TensorEvaluator.3"** %2, align 8
store i32 %index, i32* %3, align 4
%4 = load %"struct.Eigen::TensorEvaluator.3"*, %"struct.Eigen::TensorEvaluator.3"** %2, align 8
%5 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.3", %"struct.Eigen::TensorEvaluator.3"* %4, i32 0, i32 0
%6 = load float*, float** %5, align 8
%7 = load i32, i32* %3, align 4
%8 = sext i32 %7 to i64
%9 = getelementptr inbounds float, float* %6, i64 %8
store float* %9, float** %1, align 8
%10 = load float*, float** %1, align 8
%11 = load float, float* %10, align 4
ret float %11
}
; Function Attrs: nounwind
declare double @sqrt(double) #0
define weak_odr void @_ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_19TensorCwiseBinaryOpINS0_23scalar_sqrt_gradient_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEKNS4_INS5_ISC_Li1ELi1ElEELi16ES7_EEEEEENS_9GpuDeviceEEElEEvT_T0_(%"struct.Eigen::TensorEvaluator.7"* byval align 8 %eval, i64 %size) #2 comdat {
%1 = alloca %"struct.Eigen::TensorEvaluator.7"*, align 8
%2 = alloca i64, align 8
%3 = alloca i64, align 8
%4 = alloca i64, align 8
%PacketSize.i = alloca i64, align 8
%vectorized_size.i = alloca i64, align 8
%vectorized_step_size.i = alloca i64, align 8
%i.i = alloca i64, align 8
%i1.i = alloca i64, align 8
%5 = alloca i64, align 8
%first_index = alloca i64, align 8
%step_size = alloca i64, align 8
%vectorizable = alloca i8, align 1
store i64 %size, i64* %5, align 8
%6 = call i32 @llvm.ptx.read.ctaid.x() #7
%7 = call i32 @llvm.ptx.read.ntid.x() #7
%8 = mul i32 %6, %7
%9 = call i32 @llvm.ptx.read.tid.x() #7
%10 = add i32 %8, %9
%11 = zext i32 %10 to i64
store i64 %11, i64* %first_index, align 8
%12 = call i32 @llvm.ptx.read.ntid.x() #7
%13 = call i32 @llvm.ptx.read.nctaid.x() #7
%14 = mul i32 %12, %13
%15 = zext i32 %14 to i64
store i64 %15, i64* %step_size, align 8
store i8 1, i8* %vectorizable, align 1
%16 = load i64, i64* %first_index, align 8
%17 = load i64, i64* %5, align 8
%18 = load i64, i64* %step_size, align 8
store %"struct.Eigen::TensorEvaluator.7"* %eval, %"struct.Eigen::TensorEvaluator.7"** %1, align 8
store i64 %16, i64* %2, align 8
store i64 %17, i64* %3, align 8
store i64 %18, i64* %4, align 8
store i64 4, i64* %PacketSize.i, align 8
%19 = load i64, i64* %3, align 8
%20 = sdiv i64 %19, 4
%21 = mul nsw i64 %20, 4
store i64 %21, i64* %vectorized_size.i, align 8
%22 = load i64, i64* %4, align 8
%23 = mul nsw i64 %22, 4
store i64 %23, i64* %vectorized_step_size.i, align 8
%24 = load i64, i64* %2, align 8
%25 = mul nsw i64 %24, 4
store i64 %25, i64* %i.i, align 8
br label %26
; <label>:26 ; preds = %30, %0
%27 = load i64, i64* %i.i, align 8
%28 = load i64, i64* %vectorized_size.i, align 8
%29 = icmp slt i64 %27, %28
br i1 %29, label %30, label %36
; <label>:30 ; preds = %26
%31 = load %"struct.Eigen::TensorEvaluator.7"*, %"struct.Eigen::TensorEvaluator.7"** %1, align 8
%32 = load i64, i64* %i.i, align 8
call void @_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_19TensorCwiseBinaryOpINS_8internal23scalar_sqrt_gradient_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEKNS2_INS3_ISB_Li1ELi1ElEELi16ES5_EEEEEENS_9GpuDeviceEE10evalPacketEl(%"struct.Eigen::TensorEvaluator.7"* %31, i64 %32)
%33 = load i64, i64* %vectorized_step_size.i, align 8
%34 = load i64, i64* %i.i, align 8
%35 = add nsw i64 %34, %33
store i64 %35, i64* %i.i, align 8
br label %26
; <label>:36 ; preds = %26
%37 = load i64, i64* %vectorized_size.i, align 8
%38 = load i64, i64* %2, align 8
%39 = add nsw i64 %37, %38
store i64 %39, i64* %i1.i, align 8
br label %40
; <label>:40 ; preds = %44, %36
%41 = load i64, i64* %i1.i, align 8
%42 = load i64, i64* %3, align 8
%43 = icmp slt i64 %41, %42
br i1 %43, label %44, label %_ZN5Eigen8internal19EigenMetaKernelEvalINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_19TensorCwiseBinaryOpINS0_23scalar_sqrt_gradient_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEKNS4_INS5_ISC_Li1ELi1ElEELi16ES7_EEEEEENS_9GpuDeviceEEElLb1EE3runERSO_lll.exit
; <label>:44 ; preds = %40
%45 = load %"struct.Eigen::TensorEvaluator.7"*, %"struct.Eigen::TensorEvaluator.7"** %1, align 8
%46 = load i64, i64* %i1.i, align 8
call void @_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_19TensorCwiseBinaryOpINS_8internal23scalar_sqrt_gradient_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEKNS2_INS3_ISB_Li1ELi1ElEELi16ES5_EEEEEENS_9GpuDeviceEE10evalScalarEl(%"struct.Eigen::TensorEvaluator.7"* %45, i64 %46)
%47 = load i64, i64* %4, align 8
%48 = load i64, i64* %i1.i, align 8
%49 = add nsw i64 %48, %47
store i64 %49, i64* %i1.i, align 8
br label %40
_ZN5Eigen8internal19EigenMetaKernelEvalINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_19TensorCwiseBinaryOpINS0_23scalar_sqrt_gradient_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEKNS4_INS5_ISC_Li1ELi1ElEELi16ES7_EEEEEENS_9GpuDeviceEEElLb1EE3runERSO_lll.exit: ; preds = %40
ret void
}
; Function Attrs: inlinehint
define linkonce_odr void @_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_19TensorCwiseBinaryOpINS_8internal23scalar_sqrt_gradient_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEKNS2_INS3_ISB_Li1ELi1ElEELi16ES5_EEEEEENS_9GpuDeviceEE10evalPacketEl(%"struct.Eigen::TensorEvaluator.7"* %this, i64 %i) #4 comdat align 2 {
%1 = alloca %"struct.Eigen::TensorEvaluator.7"*, align 8
%2 = alloca i64, align 8
%LhsStoreMode = alloca i32, align 4
%RhsLoadMode = alloca i32, align 4
%3 = alloca %struct.float4, align 4
store %"struct.Eigen::TensorEvaluator.7"* %this, %"struct.Eigen::TensorEvaluator.7"** %1, align 8
store i64 %i, i64* %2, align 8
%4 = load %"struct.Eigen::TensorEvaluator.7"*, %"struct.Eigen::TensorEvaluator.7"** %1, align 8
store i32 16, i32* %LhsStoreMode, align 4
store i32 16, i32* %RhsLoadMode, align 4
%5 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.7", %"struct.Eigen::TensorEvaluator.7"* %4, i32 0, i32 0
%6 = load i64, i64* %2, align 8
%7 = trunc i64 %6 to i32
%8 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.7", %"struct.Eigen::TensorEvaluator.7"* %4, i32 0, i32 1
%9 = load i64, i64* %2, align 8
%10 = call %struct.float4 @_ZNK5Eigen15TensorEvaluatorIKNS_19TensorCwiseBinaryOpINS_8internal23scalar_sqrt_gradient_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS5_INS6_IS7_Li1ELi1ElEELi16ES9_EEEENS_9GpuDeviceEE6packetILi16EEE6float4l(%"struct.Eigen::TensorEvaluator.8"* %8, i64 %9)
%11 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 0
%12 = extractvalue %struct.float4 %10, 0
store float %12, float* %11, align 4
%13 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 1
%14 = extractvalue %struct.float4 %10, 1
store float %14, float* %13, align 4
%15 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 2
%16 = extractvalue %struct.float4 %10, 2
store float %16, float* %15, align 4
%17 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 3
%18 = extractvalue %struct.float4 %10, 3
store float %18, float* %17, align 4
call void @_ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE11writePacketILi16EEEviRK6float4(%"struct.Eigen::TensorEvaluator.0"* %5, i32 %7, %struct.float4* dereferenceable(16) %3)
ret void
}
; Function Attrs: inlinehint
define linkonce_odr void @_ZN5Eigen15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_19TensorCwiseBinaryOpINS_8internal23scalar_sqrt_gradient_opIfEEKNS2_INS3_IKfLi1ELi1EiEELi16ES5_EEKNS2_INS3_ISB_Li1ELi1ElEELi16ES5_EEEEEENS_9GpuDeviceEE10evalScalarEl(%"struct.Eigen::TensorEvaluator.7"* %this, i64 %i) #4 comdat align 2 {
%1 = alloca %"struct.Eigen::TensorEvaluator.7"*, align 8
%2 = alloca i64, align 8
store %"struct.Eigen::TensorEvaluator.7"* %this, %"struct.Eigen::TensorEvaluator.7"** %1, align 8
store i64 %i, i64* %2, align 8
%3 = load %"struct.Eigen::TensorEvaluator.7"*, %"struct.Eigen::TensorEvaluator.7"** %1, align 8
%4 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.7", %"struct.Eigen::TensorEvaluator.7"* %3, i32 0, i32 1
%5 = load i64, i64* %2, align 8
%6 = call float @_ZNK5Eigen15TensorEvaluatorIKNS_19TensorCwiseBinaryOpINS_8internal23scalar_sqrt_gradient_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS5_INS6_IS7_Li1ELi1ElEELi16ES9_EEEENS_9GpuDeviceEE5coeffEl(%"struct.Eigen::TensorEvaluator.8"* %4, i64 %5)
%7 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.7", %"struct.Eigen::TensorEvaluator.7"* %3, i32 0, i32 0
%8 = load i64, i64* %2, align 8
%9 = trunc i64 %8 to i32
%10 = call dereferenceable(4) float* @_ZN5Eigen15TensorEvaluatorINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE8coeffRefEi(%"struct.Eigen::TensorEvaluator.0"* %7, i32 %9)
store float %6, float* %10, align 4
ret void
}
; Function Attrs: inlinehint
define linkonce_odr %struct.float4 @_ZNK5Eigen15TensorEvaluatorIKNS_19TensorCwiseBinaryOpINS_8internal23scalar_sqrt_gradient_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS5_INS6_IS7_Li1ELi1ElEELi16ES9_EEEENS_9GpuDeviceEE6packetILi16EEE6float4l(%"struct.Eigen::TensorEvaluator.8"* %this, i64 %index) #4 comdat align 2 {
%1 = alloca %struct.float4, align 4
%2 = alloca %"struct.Eigen::TensorEvaluator.8"*, align 8
%3 = alloca i64, align 8
%4 = alloca %struct.float4, align 4
%5 = alloca %struct.float4, align 4
store %"struct.Eigen::TensorEvaluator.8"* %this, %"struct.Eigen::TensorEvaluator.8"** %2, align 8
store i64 %index, i64* %3, align 8
%6 = load %"struct.Eigen::TensorEvaluator.8"*, %"struct.Eigen::TensorEvaluator.8"** %2, align 8
%7 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.8", %"struct.Eigen::TensorEvaluator.8"* %6, i32 0, i32 0
%8 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.8", %"struct.Eigen::TensorEvaluator.8"* %6, i32 0, i32 1
%9 = load i64, i64* %3, align 8
%10 = trunc i64 %9 to i32
%11 = call %struct.float4 @_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE6packetILi16EEE6float4i(%"struct.Eigen::TensorEvaluator.3"* %8, i32 %10)
%12 = getelementptr inbounds %struct.float4, %struct.float4* %4, i32 0, i32 0
%13 = extractvalue %struct.float4 %11, 0
store float %13, float* %12, align 4
%14 = getelementptr inbounds %struct.float4, %struct.float4* %4, i32 0, i32 1
%15 = extractvalue %struct.float4 %11, 1
store float %15, float* %14, align 4
%16 = getelementptr inbounds %struct.float4, %struct.float4* %4, i32 0, i32 2
%17 = extractvalue %struct.float4 %11, 2
store float %17, float* %16, align 4
%18 = getelementptr inbounds %struct.float4, %struct.float4* %4, i32 0, i32 3
%19 = extractvalue %struct.float4 %11, 3
store float %19, float* %18, align 4
%20 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.8", %"struct.Eigen::TensorEvaluator.8"* %6, i32 0, i32 2
%21 = load i64, i64* %3, align 8
%22 = call %struct.float4 @_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1ElEELi16ENS_11MakePointerEEENS_9GpuDeviceEE6packetILi16EEE6float4l(%"struct.Eigen::TensorEvaluator.9"* %20, i64 %21)
%23 = getelementptr inbounds %struct.float4, %struct.float4* %5, i32 0, i32 0
%24 = extractvalue %struct.float4 %22, 0
store float %24, float* %23, align 4
%25 = getelementptr inbounds %struct.float4, %struct.float4* %5, i32 0, i32 1
%26 = extractvalue %struct.float4 %22, 1
store float %26, float* %25, align 4
%27 = getelementptr inbounds %struct.float4, %struct.float4* %5, i32 0, i32 2
%28 = extractvalue %struct.float4 %22, 2
store float %28, float* %27, align 4
%29 = getelementptr inbounds %struct.float4, %struct.float4* %5, i32 0, i32 3
%30 = extractvalue %struct.float4 %22, 3
store float %30, float* %29, align 4
%31 = call %struct.float4 @_ZNK5Eigen8internal23scalar_sqrt_gradient_opIfE8packetOpI6float4EEKT_RS6_S7_(%"struct.Eigen::internal::scalar_sqrt_gradient_op"* %7, %struct.float4* dereferenceable(16) %4, %struct.float4* dereferenceable(16) %5)
%32 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 0
%33 = extractvalue %struct.float4 %31, 0
store float %33, float* %32, align 4
%34 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 1
%35 = extractvalue %struct.float4 %31, 1
store float %35, float* %34, align 4
%36 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 2
%37 = extractvalue %struct.float4 %31, 2
store float %37, float* %36, align 4
%38 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 3
%39 = extractvalue %struct.float4 %31, 3
store float %39, float* %38, align 4
%40 = load %struct.float4, %struct.float4* %1, align 4
ret %struct.float4 %40
}
; Function Attrs: inlinehint
define linkonce_odr %struct.float4 @_ZNK5Eigen8internal23scalar_sqrt_gradient_opIfE8packetOpI6float4EEKT_RS6_S7_(%"struct.Eigen::internal::scalar_sqrt_gradient_op"* %this, %struct.float4* dereferenceable(16) %output, %struct.float4* dereferenceable(16) %output_gradient) #4 comdat align 2 {
%1 = alloca %struct.float4, align 4
%2 = alloca %"struct.Eigen::internal::scalar_sqrt_gradient_op"*, align 8
%3 = alloca %struct.float4*, align 8
%4 = alloca %struct.float4*, align 8
%const_half = alloca %struct.float4, align 4
%5 = alloca float, align 4
%out_conj = alloca %struct.float4, align 4
%6 = alloca %struct.float4, align 4
store %"struct.Eigen::internal::scalar_sqrt_gradient_op"* %this, %"struct.Eigen::internal::scalar_sqrt_gradient_op"** %2, align 8
store %struct.float4* %output, %struct.float4** %3, align 8
store %struct.float4* %output_gradient, %struct.float4** %4, align 8
%7 = load %"struct.Eigen::internal::scalar_sqrt_gradient_op"*, %"struct.Eigen::internal::scalar_sqrt_gradient_op"** %2, align 8
store float 5.000000e-01, float* %5, align 4
%8 = call %struct.float4 @_ZN5Eigen8internal5pset1I6float4EET_RKNS0_15unpacket_traitsIS3_E4typeE(float* dereferenceable(4) %5)
%9 = getelementptr inbounds %struct.float4, %struct.float4* %const_half, i32 0, i32 0
%10 = extractvalue %struct.float4 %8, 0
store float %10, float* %9, align 4
%11 = getelementptr inbounds %struct.float4, %struct.float4* %const_half, i32 0, i32 1
%12 = extractvalue %struct.float4 %8, 1
store float %12, float* %11, align 4
%13 = getelementptr inbounds %struct.float4, %struct.float4* %const_half, i32 0, i32 2
%14 = extractvalue %struct.float4 %8, 2
store float %14, float* %13, align 4
%15 = getelementptr inbounds %struct.float4, %struct.float4* %const_half, i32 0, i32 3
%16 = extractvalue %struct.float4 %8, 3
store float %16, float* %15, align 4
%17 = load %struct.float4*, %struct.float4** %3, align 8
%18 = call %struct.float4 @_ZN5Eigen8internal5pconjI6float4EET_RKS3_(%struct.float4* dereferenceable(16) %17)
%19 = getelementptr inbounds %struct.float4, %struct.float4* %out_conj, i32 0, i32 0
%20 = extractvalue %struct.float4 %18, 0
store float %20, float* %19, align 4
%21 = getelementptr inbounds %struct.float4, %struct.float4* %out_conj, i32 0, i32 1
%22 = extractvalue %struct.float4 %18, 1
store float %22, float* %21, align 4
%23 = getelementptr inbounds %struct.float4, %struct.float4* %out_conj, i32 0, i32 2
%24 = extractvalue %struct.float4 %18, 2
store float %24, float* %23, align 4
%25 = getelementptr inbounds %struct.float4, %struct.float4* %out_conj, i32 0, i32 3
%26 = extractvalue %struct.float4 %18, 3
store float %26, float* %25, align 4
%27 = load %struct.float4*, %struct.float4** %4, align 8
%28 = call %struct.float4 @_ZN5Eigen8internal4pmulI6float4EET_RKS3_S5_(%struct.float4* dereferenceable(16) %const_half, %struct.float4* dereferenceable(16) %27)
%29 = getelementptr inbounds %struct.float4, %struct.float4* %6, i32 0, i32 0
%30 = extractvalue %struct.float4 %28, 0
store float %30, float* %29, align 4
%31 = getelementptr inbounds %struct.float4, %struct.float4* %6, i32 0, i32 1
%32 = extractvalue %struct.float4 %28, 1
store float %32, float* %31, align 4
%33 = getelementptr inbounds %struct.float4, %struct.float4* %6, i32 0, i32 2
%34 = extractvalue %struct.float4 %28, 2
store float %34, float* %33, align 4
%35 = getelementptr inbounds %struct.float4, %struct.float4* %6, i32 0, i32 3
%36 = extractvalue %struct.float4 %28, 3
store float %36, float* %35, align 4
%37 = call %struct.float4 @_ZN5Eigen8internal4pdivI6float4EET_RKS3_S5_(%struct.float4* dereferenceable(16) %6, %struct.float4* dereferenceable(16) %out_conj)
%38 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 0
%39 = extractvalue %struct.float4 %37, 0
store float %39, float* %38, align 4
%40 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 1
%41 = extractvalue %struct.float4 %37, 1
store float %41, float* %40, align 4
%42 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 2
%43 = extractvalue %struct.float4 %37, 2
store float %43, float* %42, align 4
%44 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 3
%45 = extractvalue %struct.float4 %37, 3
store float %45, float* %44, align 4
%46 = load %struct.float4, %struct.float4* %1, align 4
ret %struct.float4 %46
}
; Function Attrs: inlinehint
define linkonce_odr %struct.float4 @_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1ElEELi16ENS_11MakePointerEEENS_9GpuDeviceEE6packetILi16EEE6float4l(%"struct.Eigen::TensorEvaluator.9"* %this, i64 %index) #4 comdat align 2 {
%1 = alloca %struct.float4, align 4
%2 = alloca float*, align 8
%3 = alloca %struct.float4, align 4
%4 = alloca %"struct.Eigen::TensorEvaluator.9"*, align 8
%5 = alloca i64, align 8
store %"struct.Eigen::TensorEvaluator.9"* %this, %"struct.Eigen::TensorEvaluator.9"** %4, align 8
store i64 %index, i64* %5, align 8
%6 = load %"struct.Eigen::TensorEvaluator.9"*, %"struct.Eigen::TensorEvaluator.9"** %4, align 8
%7 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.9", %"struct.Eigen::TensorEvaluator.9"* %6, i32 0, i32 0
%8 = load float*, float** %7, align 8
%9 = load i64, i64* %5, align 8
%10 = getelementptr inbounds float, float* %8, i64 %9
store float* %10, float** %2, align 8
%11 = load float*, float** %2, align 8
%12 = load float, float* %11, align 4
%13 = load float*, float** %2, align 8
%14 = getelementptr inbounds float, float* %13, i64 1
%15 = load float, float* %14, align 4
%16 = load float*, float** %2, align 8
%17 = getelementptr inbounds float, float* %16, i64 2
%18 = load float, float* %17, align 4
%19 = load float*, float** %2, align 8
%20 = getelementptr inbounds float, float* %19, i64 3
%21 = load float, float* %20, align 4
%22 = call %struct.float4 @_Z11make_float4ffff(float %12, float %15, float %18, float %21)
%23 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 0
%24 = extractvalue %struct.float4 %22, 0
store float %24, float* %23, align 4
%25 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 1
%26 = extractvalue %struct.float4 %22, 1
store float %26, float* %25, align 4
%27 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 2
%28 = extractvalue %struct.float4 %22, 2
store float %28, float* %27, align 4
%29 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 3
%30 = extractvalue %struct.float4 %22, 3
store float %30, float* %29, align 4
%31 = load %struct.float4, %struct.float4* %1, align 4
%32 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 0
%33 = extractvalue %struct.float4 %31, 0
store float %33, float* %32, align 4
%34 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 1
%35 = extractvalue %struct.float4 %31, 1
store float %35, float* %34, align 4
%36 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 2
%37 = extractvalue %struct.float4 %31, 2
store float %37, float* %36, align 4
%38 = getelementptr inbounds %struct.float4, %struct.float4* %3, i32 0, i32 3
%39 = extractvalue %struct.float4 %31, 3
store float %39, float* %38, align 4
%40 = load %struct.float4, %struct.float4* %3, align 4
ret %struct.float4 %40
}
; Function Attrs: inlinehint
define linkonce_odr %struct.float4 @_ZN5Eigen8internal5pset1I6float4EET_RKNS0_15unpacket_traitsIS3_E4typeE(float* dereferenceable(4) %from) #4 comdat {
%1 = alloca %struct.float4, align 4
%2 = alloca float*, align 8
store float* %from, float** %2, align 8
%3 = load float*, float** %2, align 8
%4 = load float, float* %3, align 4
%5 = load float*, float** %2, align 8
%6 = load float, float* %5, align 4
%7 = load float*, float** %2, align 8
%8 = load float, float* %7, align 4
%9 = load float*, float** %2, align 8
%10 = load float, float* %9, align 4
%11 = call %struct.float4 @_Z11make_float4ffff(float %4, float %6, float %8, float %10)
%12 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 0
%13 = extractvalue %struct.float4 %11, 0
store float %13, float* %12, align 4
%14 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 1
%15 = extractvalue %struct.float4 %11, 1
store float %15, float* %14, align 4
%16 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 2
%17 = extractvalue %struct.float4 %11, 2
store float %17, float* %16, align 4
%18 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 3
%19 = extractvalue %struct.float4 %11, 3
store float %19, float* %18, align 4
%20 = load %struct.float4, %struct.float4* %1, align 4
ret %struct.float4 %20
}
; Function Attrs: inlinehint nounwind
define linkonce_odr %struct.float4 @_ZN5Eigen8internal5pconjI6float4EET_RKS3_(%struct.float4* dereferenceable(16) %a) #5 comdat {
%1 = alloca %struct.float4, align 4
%2 = alloca %struct.float4*, align 8
store %struct.float4* %a, %struct.float4** %2, align 8
%3 = load %struct.float4*, %struct.float4** %2, align 8
%4 = bitcast %struct.float4* %1 to i8*
%5 = bitcast %struct.float4* %3 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* %5, i64 16, i32 4, i1 false)
%6 = load %struct.float4, %struct.float4* %1, align 4
ret %struct.float4 %6
}
; Function Attrs: inlinehint
define linkonce_odr %struct.float4 @_ZN5Eigen8internal4pdivI6float4EET_RKS3_S5_(%struct.float4* dereferenceable(16) %a, %struct.float4* dereferenceable(16) %b) #4 comdat {
%1 = alloca %struct.float4, align 4
%2 = alloca %struct.float4*, align 8
%3 = alloca %struct.float4*, align 8
store %struct.float4* %a, %struct.float4** %2, align 8
store %struct.float4* %b, %struct.float4** %3, align 8
%4 = load %struct.float4*, %struct.float4** %2, align 8
%5 = getelementptr inbounds %struct.float4, %struct.float4* %4, i32 0, i32 0
%6 = load float, float* %5, align 4
%7 = load %struct.float4*, %struct.float4** %3, align 8
%8 = getelementptr inbounds %struct.float4, %struct.float4* %7, i32 0, i32 0
%9 = load float, float* %8, align 4
%10 = fdiv float %6, %9
%11 = load %struct.float4*, %struct.float4** %2, align 8
%12 = getelementptr inbounds %struct.float4, %struct.float4* %11, i32 0, i32 1
%13 = load float, float* %12, align 4
%14 = load %struct.float4*, %struct.float4** %3, align 8
%15 = getelementptr inbounds %struct.float4, %struct.float4* %14, i32 0, i32 1
%16 = load float, float* %15, align 4
%17 = fdiv float %13, %16
%18 = load %struct.float4*, %struct.float4** %2, align 8
%19 = getelementptr inbounds %struct.float4, %struct.float4* %18, i32 0, i32 2
%20 = load float, float* %19, align 4
%21 = load %struct.float4*, %struct.float4** %3, align 8
%22 = getelementptr inbounds %struct.float4, %struct.float4* %21, i32 0, i32 2
%23 = load float, float* %22, align 4
%24 = fdiv float %20, %23
%25 = load %struct.float4*, %struct.float4** %2, align 8
%26 = getelementptr inbounds %struct.float4, %struct.float4* %25, i32 0, i32 3
%27 = load float, float* %26, align 4
%28 = load %struct.float4*, %struct.float4** %3, align 8
%29 = getelementptr inbounds %struct.float4, %struct.float4* %28, i32 0, i32 3
%30 = load float, float* %29, align 4
%31 = fdiv float %27, %30
%32 = call %struct.float4 @_Z11make_float4ffff(float %10, float %17, float %24, float %31)
%33 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 0
%34 = extractvalue %struct.float4 %32, 0
store float %34, float* %33, align 4
%35 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 1
%36 = extractvalue %struct.float4 %32, 1
store float %36, float* %35, align 4
%37 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 2
%38 = extractvalue %struct.float4 %32, 2
store float %38, float* %37, align 4
%39 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 3
%40 = extractvalue %struct.float4 %32, 3
store float %40, float* %39, align 4
%41 = load %struct.float4, %struct.float4* %1, align 4
ret %struct.float4 %41
}
; Function Attrs: inlinehint
define linkonce_odr %struct.float4 @_ZN5Eigen8internal4pmulI6float4EET_RKS3_S5_(%struct.float4* dereferenceable(16) %a, %struct.float4* dereferenceable(16) %b) #4 comdat {
%1 = alloca %struct.float4, align 4
%2 = alloca %struct.float4*, align 8
%3 = alloca %struct.float4*, align 8
store %struct.float4* %a, %struct.float4** %2, align 8
store %struct.float4* %b, %struct.float4** %3, align 8
%4 = load %struct.float4*, %struct.float4** %2, align 8
%5 = getelementptr inbounds %struct.float4, %struct.float4* %4, i32 0, i32 0
%6 = load float, float* %5, align 4
%7 = load %struct.float4*, %struct.float4** %3, align 8
%8 = getelementptr inbounds %struct.float4, %struct.float4* %7, i32 0, i32 0
%9 = load float, float* %8, align 4
%10 = fmul float %6, %9
%11 = load %struct.float4*, %struct.float4** %2, align 8
%12 = getelementptr inbounds %struct.float4, %struct.float4* %11, i32 0, i32 1
%13 = load float, float* %12, align 4
%14 = load %struct.float4*, %struct.float4** %3, align 8
%15 = getelementptr inbounds %struct.float4, %struct.float4* %14, i32 0, i32 1
%16 = load float, float* %15, align 4
%17 = fmul float %13, %16
%18 = load %struct.float4*, %struct.float4** %2, align 8
%19 = getelementptr inbounds %struct.float4, %struct.float4* %18, i32 0, i32 2
%20 = load float, float* %19, align 4
%21 = load %struct.float4*, %struct.float4** %3, align 8
%22 = getelementptr inbounds %struct.float4, %struct.float4* %21, i32 0, i32 2
%23 = load float, float* %22, align 4
%24 = fmul float %20, %23
%25 = load %struct.float4*, %struct.float4** %2, align 8
%26 = getelementptr inbounds %struct.float4, %struct.float4* %25, i32 0, i32 3
%27 = load float, float* %26, align 4
%28 = load %struct.float4*, %struct.float4** %3, align 8
%29 = getelementptr inbounds %struct.float4, %struct.float4* %28, i32 0, i32 3
%30 = load float, float* %29, align 4
%31 = fmul float %27, %30
%32 = call %struct.float4 @_Z11make_float4ffff(float %10, float %17, float %24, float %31)
%33 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 0
%34 = extractvalue %struct.float4 %32, 0
store float %34, float* %33, align 4
%35 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 1
%36 = extractvalue %struct.float4 %32, 1
store float %36, float* %35, align 4
%37 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 2
%38 = extractvalue %struct.float4 %32, 2
store float %38, float* %37, align 4
%39 = getelementptr inbounds %struct.float4, %struct.float4* %1, i32 0, i32 3
%40 = extractvalue %struct.float4 %32, 3
store float %40, float* %39, align 4
%41 = load %struct.float4, %struct.float4* %1, align 4
ret %struct.float4 %41
}
define linkonce_odr float @_ZNK5Eigen15TensorEvaluatorIKNS_19TensorCwiseBinaryOpINS_8internal23scalar_sqrt_gradient_opIfEEKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS5_INS6_IS7_Li1ELi1ElEELi16ES9_EEEENS_9GpuDeviceEE5coeffEl(%"struct.Eigen::TensorEvaluator.8"* %this, i64 %index) #2 comdat align 2 {
%1 = alloca %"struct.Eigen::TensorEvaluator.8"*, align 8
%2 = alloca i64, align 8
%3 = alloca float, align 4
%4 = alloca float, align 4
store %"struct.Eigen::TensorEvaluator.8"* %this, %"struct.Eigen::TensorEvaluator.8"** %1, align 8
store i64 %index, i64* %2, align 8
%5 = load %"struct.Eigen::TensorEvaluator.8"*, %"struct.Eigen::TensorEvaluator.8"** %1, align 8
%6 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.8", %"struct.Eigen::TensorEvaluator.8"* %5, i32 0, i32 0
%7 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.8", %"struct.Eigen::TensorEvaluator.8"* %5, i32 0, i32 1
%8 = load i64, i64* %2, align 8
%9 = trunc i64 %8 to i32
%10 = call float @_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1EiEELi16ENS_11MakePointerEEENS_9GpuDeviceEE5coeffEi(%"struct.Eigen::TensorEvaluator.3"* %7, i32 %9)
store float %10, float* %3, align 4
%11 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.8", %"struct.Eigen::TensorEvaluator.8"* %5, i32 0, i32 2
%12 = load i64, i64* %2, align 8
%13 = call float @_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1ElEELi16ENS_11MakePointerEEENS_9GpuDeviceEE5coeffEl(%"struct.Eigen::TensorEvaluator.9"* %11, i64 %12)
store float %13, float* %4, align 4
%14 = call float @_ZNK5Eigen8internal23scalar_sqrt_gradient_opIfEclERKfS4_(%"struct.Eigen::internal::scalar_sqrt_gradient_op"* %6, float* dereferenceable(4) %3, float* dereferenceable(4) %4)
ret float %14
}
; Function Attrs: inlinehint
define linkonce_odr float @_ZNK5Eigen8internal23scalar_sqrt_gradient_opIfEclERKfS4_(%"struct.Eigen::internal::scalar_sqrt_gradient_op"* %this, float* dereferenceable(4) %output, float* dereferenceable(4) %output_gradient) #4 comdat align 2 {
%1 = alloca %"struct.Eigen::internal::scalar_sqrt_gradient_op"*, align 8
%2 = alloca float*, align 8
%3 = alloca float*, align 8
%out_conj = alloca float, align 4
store %"struct.Eigen::internal::scalar_sqrt_gradient_op"* %this, %"struct.Eigen::internal::scalar_sqrt_gradient_op"** %1, align 8
store float* %output, float** %2, align 8
store float* %output_gradient, float** %3, align 8
%4 = load %"struct.Eigen::internal::scalar_sqrt_gradient_op"*, %"struct.Eigen::internal::scalar_sqrt_gradient_op"** %1, align 8
%5 = load float*, float** %2, align 8
%6 = call float @_ZN5Eigen6numext4conjIfEENS_8internal11conj_retvalINS2_36global_math_functions_filtering_baseIT_vE4typeEE4typeERKS5_(float* dereferenceable(4) %5)
store float %6, float* %out_conj, align 4
%7 = load float*, float** %3, align 8
%8 = load float, float* %7, align 4
%9 = fmul float 5.000000e-01, %8
%10 = load float, float* %out_conj, align 4
%11 = fdiv float %9, %10
ret float %11
}
; Function Attrs: inlinehint nounwind
define linkonce_odr float @_ZNK5Eigen15TensorEvaluatorIKNS_9TensorMapINS_6TensorIKfLi1ELi1ElEELi16ENS_11MakePointerEEENS_9GpuDeviceEE5coeffEl(%"struct.Eigen::TensorEvaluator.9"* %this, i64 %index) #5 comdat align 2 {
%1 = alloca float*, align 8
%2 = alloca %"struct.Eigen::TensorEvaluator.9"*, align 8
%3 = alloca i64, align 8
store %"struct.Eigen::TensorEvaluator.9"* %this, %"struct.Eigen::TensorEvaluator.9"** %2, align 8
store i64 %index, i64* %3, align 8
%4 = load %"struct.Eigen::TensorEvaluator.9"*, %"struct.Eigen::TensorEvaluator.9"** %2, align 8
%5 = getelementptr inbounds %"struct.Eigen::TensorEvaluator.9", %"struct.Eigen::TensorEvaluator.9"* %4, i32 0, i32 0
%6 = load float*, float** %5, align 8
%7 = load i64, i64* %3, align 8
%8 = getelementptr inbounds float, float* %6, i64 %7
store float* %8, float** %1, align 8
%9 = load float*, float** %1, align 8
%10 = load float, float* %9, align 4
ret float %10
}
; Function Attrs: inlinehint
define linkonce_odr float @_ZN5Eigen6numext4conjIfEENS_8internal11conj_retvalINS2_36global_math_functions_filtering_baseIT_vE4typeEE4typeERKS5_(float* dereferenceable(4) %x) #4 comdat {
%1 = alloca float*, align 8
store float* %x, float** %1, align 8
%2 = load float*, float** %1, align 8
%3 = call float @_ZN5Eigen8internal9conj_implIfLb0EE3runERKf(float* dereferenceable(4) %2)
ret float %3
}
; Function Attrs: inlinehint nounwind
define linkonce_odr float @_ZN5Eigen8internal9conj_implIfLb0EE3runERKf(float* dereferenceable(4) %x) #5 comdat align 2 {
%1 = alloca float*, align 8
store float* %x, float** %1, align 8
%2 = load float*, float** %1, align 8
%3 = load float, float* %2, align 4
ret float %3
}
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_30" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_30" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_30" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind readnone }
attributes #4 = { inlinehint "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_30" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #5 = { inlinehint nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_30" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #6 = { argmemonly nounwind }
attributes #7 = { nounwind }
!nvvm.annotations = !{!0, !1, !2, !3}
!llvm.module.flags = !{!4}
!llvm.ident = !{!5}
!0 = !{void (%"struct.Eigen::TensorEvaluator"*, i32)* @_ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiEEvT_T0_, !"kernel", i32 1}
!1 = !{void (%"struct.Eigen::TensorEvaluator"*, i32)* @_ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_18TensorCwiseUnaryOpINS0_14scalar_sqrt_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEEEEENS_9GpuDeviceEEEiEEvT_T0_, !"maxntidx", i32 1024}
!2 = !{void (%"struct.Eigen::TensorEvaluator.7"*, i64)* @_ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_19TensorCwiseBinaryOpINS0_23scalar_sqrt_gradient_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEKNS4_INS5_ISC_Li1ELi1ElEELi16ES7_EEEEEENS_9GpuDeviceEEElEEvT_T0_, !"kernel", i32 1}
!3 = !{void (%"struct.Eigen::TensorEvaluator.7"*, i64)* @_ZN5Eigen8internal15EigenMetaKernelINS_15TensorEvaluatorIKNS_14TensorAssignOpINS_9TensorMapINS_6TensorIfLi1ELi1EiEELi16ENS_11MakePointerEEEKNS_19TensorCwiseBinaryOpINS0_23scalar_sqrt_gradient_opIfEEKNS4_INS5_IKfLi1ELi1EiEELi16ES7_EEKNS4_INS5_ISC_Li1ELi1ElEELi16ES7_EEEEEENS_9GpuDeviceEEElEEvT_T0_, !"maxntidx", i32 1024}
!4 = !{i32 1, !"PIC Level", i32 2}
!5 = !{!"clang version 3.8.0-2ubuntu4 (tags/RELEASE_380/final)"}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment