Last active
February 10, 2020 22:16
-
-
Save attilaz/eb629b22346d2d2e3652b965887da215 to your computer and use it in GitHub Desktop.
simd function name changes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# for shuffle should we somehow show that it is handled as 32bitx4 | |
# if we want shuffling with 16bit and 8 bit values... | |
void simd_shuf_xyAB(Ty _a, Ty _b); -> simd_shuf_v32x4_xyAB ??? | |
void simd_shuf_ABxy(Ty _a, Ty _b); | |
void simd_shuf_CDzw(Ty _a, Ty _b); | |
void simd_shuf_zwCD(Ty _a, Ty _b); | |
void simd_shuf_xAyB(Ty _a, Ty _b); | |
void simd_shuf_AxBy(Ty _a, Ty _b); | |
void simd_shuf_zCwD(Ty _a, Ty _b); | |
void simd_shuf_CzDw(Ty _a, Ty _b); | |
float simd_x(Ty _a); -> simd_f32_x | |
float simd_y(Ty _a); -> simd_f32_y | |
float simd_z(Ty _a); -> simd_f32_z | |
float simd_w(Ty _a); -> simd_f32_w | |
void simd_ld(const void* _ptr); #no change | |
void simd_st(void* _ptr, Ty _a); #no change | |
void simd_stx(void* _ptr, Ty _a); #no change | |
void simd_stream(void* _ptr, Ty _a); #no change | |
Ty simd_ld(float _x, float _y, float _z, float _w); -> simd_f32_ld | |
Ty simd_ld(float _x, float _y, float _z, float _w, float _a, float _b, float _c, float _d); -> simd_f32_ld | |
Ty simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w); -> simd_i32_ld or simd_u32_ld ??? | |
Ty simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, | |
uint32_t _a, uint32_t _b, uint32_t _c, uint32_t _d); -> simd_i32_ld or simd_u32_ld ??? | |
Ty simd_splat(const void* _ptr); #no change | |
Ty simd_splat(float _a); -> simd_f32_splat | |
Ty simd_isplat(uint32_t _a); -> simd_i32_splat or simd_u32_splat ??? | |
Ty simd_zero(); #no change | |
Ty simd_itof(Ty _a); -> simd_i32_to_f32 ??? | |
Ty simd_ftoi(Ty _a); -> simd_f32_to_i32 ?? | |
Ty simd_round(Ty _a); -> simd_f32_round | |
Ty simd_add(Ty _a, Ty _b); -> simd_f32_add | |
Ty simd_sub(Ty _a, Ty _b); -> simd_f32_sub | |
Ty simd_mul(Ty _a, Ty _b); -> simd_f32_mul | |
Ty simd_div(Ty _a, Ty _b); -> simd_f32_div | |
Ty simd_rcp_est(Ty _a); -> simd_f32_rcp_est | |
Ty simd_sqrt(Ty _a); -> simd_f32_sqrt | |
Ty simd_rsqrt_est(Ty _a); -> simd_f32_rsqrt_est | |
Ty simd_dot3(Ty _a, Ty _b); -> simd_f32_dot3 | |
Ty simd_dot(Ty _a, Ty _b); -> simd_f32_dot | |
Ty simd_cmpeq(Ty _a, Ty _b); -> simd_f32_cmpeq | |
Ty simd_cmplt(Ty _a, Ty _b); -> simd_f32_cmplt | |
Ty simd_cmple(Ty _a, Ty _b); -> simd_f32_cmple | |
Ty simd_cmpgt(Ty _a, Ty _b); -> simd_f32_cmpgt | |
Ty simd_cmpge(Ty _a, Ty _b); -> simd_f32_cmpge | |
Ty simd_min(Ty _a, Ty _b); -> simd_f32_min | |
Ty simd_max(Ty _a, Ty _b); -> simd_f32_max | |
Ty simd_and(Ty _a, Ty _b); # no change | |
Ty simd_andc(Ty _a, Ty _b); # no change | |
Ty simd_or(Ty _a, Ty _b); # no change | |
Ty simd_xor(Ty _a, Ty _b); # no change | |
Ty simd_sll(Ty _a, int _count); -> simd_i32_sll or simd_u32_sll ??? | |
Ty simd_srl(Ty _a, int _count); -> simd_i32_srl or simd_u32_srl ??? | |
Ty simd_sra(Ty _a, int _count); -> simd_i32_sra or simd_u32_sra ??? | |
Ty simd_icmpeq(Ty _a, Ty _b); -> simd_i32_cmpeq | |
Ty simd_icmplt(Ty _a, Ty _b); -> simd_i32_cmplt | |
Ty simd_icmpgt(Ty _a, Ty _b); -> simd_i32_cmpgt | |
Ty simd_imin(Ty _a, Ty _b); -> simd_i32_min | |
Ty simd_imax(Ty _a, Ty _b); -> simd_i32_max | |
Ty simd_iadd(Ty _a, Ty _b); -> simd_i32_add | |
Ty simd_isub(Ty _a, Ty _b); -> simd_i32_sub | |
Ty simd_shuf_xAzC(Ty _a, Ty _b); -> ??? #no change or simd_shuf_v32x4_xAzC | |
Ty simd_shuf_yBwD(Ty _a, Ty _b); | |
Ty simd_rcp(Ty _a); -> simd_f32_rcp | |
Ty simd_orx(Ty _a); #no change ??? | |
Ty simd_orc(Ty _a, Ty _b); #no change ??? | |
Ty simd_neg(Ty _a); -> simd_f32_neg | |
Ty simd_madd(Ty _a, Ty _b, Ty _c); -> simd_f32_madd | |
Ty simd_nmsub(Ty _a, Ty _b, Ty _c); -> simd_f32_nmsub | |
Ty simd_div_nr(Ty _a, Ty _b); -> simd_f32_div_nr | |
Ty simd_selb(Ty _mask, Ty _a, Ty _b); #no change or simd_i32_selb | |
Ty simd_sels(Ty _test, Ty _a, Ty _b); #no change or simd_i32_sels | |
Ty simd_not(Ty _a); #no change | |
Ty simd_abs(Ty _a); -> simd_f32_abs | |
Ty simd_clamp(Ty _a, Ty _min, Ty _max); -> simd_f32_abs | |
Ty simd_lerp(Ty _a, Ty _b, Ty _s); -> simd_f32_lerp | |
Ty simd_rsqrt(Ty _a); -> simd_f32_rsqrt | |
Ty simd_rsqrt_nr(Ty _a); -> simd_f32_rsqrt_nr | |
Ty simd_rsqrt_carmack(Ty _a); -> simd_f32_rsqrt_carmack | |
Ty simd_sqrt_nr(Ty _a); -> simd_f32_sqrt_nr | |
Ty simd_log2(Ty _a); -> simd_f32_log2 | |
Ty simd_exp2(Ty _a); -> simd_f32_exp2 | |
Ty simd_pow(Ty _a, Ty _b); -> simd_f32_pow | |
Ty simd_cross3(Ty _a, Ty _b); -> simd_f32_cross3 | |
Ty simd_normalize3(Ty _a); -> simd_f32_normalize3 | |
Ty simd_ceil(Ty _a); -> simd_f32_ceil | |
Ty simd_floor(Ty _a); -> simd_f32_floor | |
bool simd_test_any_ni(Ty _a); # no change | |
bool simd_test_all_ni(Ty _a); # no change | |
extra functions that nudge has: | |
sse : _mm_movemask_ps (highest bits of 32 bit components to int 4bit bitmask) | |
bx: simd_i32_mask or simd_i32_test_mask ??? (assumes that input it result of compare ) | |
bx: simd_i32_sign_mask ??? | |
sse: _mm_castps_si128 | |
// Create mask from the most significant bit of each 8-bit element in a, and store the result in dst. | |
bx: simd_i8_mask | |
sse: _mm_packs_epi32 (convert 2 4xint32 -> 8xint16 with signed saturation) | |
bx: simd_pack_i32_to_i16 ??? | |
sse: _mm_packs_epi16 (convert 2 8xint16 -> 16xint8 with signed saturation) | |
bx: simd_pack_i16_to_i8 ?? | |
sse: _mm_unpacklo_epi16 | |
neon: vzip1q_s16 | |
Unpack and interleave 16 bit integers from the low half of a and b, and store the results in dst. | |
bx: simd_????? | |
simd_i16_add | |
simd_i16_cmpeq | |
simd_i16_srl | |
I guess add/sub, cmpXXX, min/max should be added for i16, i8. u32/u16/u8 ? | |
And shifts for i16,i8. | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment