Last active
October 22, 2020 11:45
-
-
Save bave/d65382f6f0641489f3d603265edc7e2b to your computer and use it in GitHub Desktop.
tmp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// +avx, +popcnt, +bmi2 | |
unsafe fn mm256_compressstroeu_ps(array: *mut f32, mask: u8, src: __m256) | |
{ | |
let convert = _pext_u64(0x0706050403020100, _pdep_u64(mask as u64, 0x0101010101010101)*0xFF); | |
let permute = _mm256_cvtepu8_epi32(_mm_cvtsi64_si128(convert as i64)); | |
let compress = _mm256_permutevar8x32_ps(src, permute); | |
let imm8 = _popcnt64(mask as i64); | |
let ret = match imm8 { | |
0 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x00) }, | |
1 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x01) }, | |
2 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x03) }, | |
3 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x07) }, | |
4 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x0f) }, | |
5 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x1f) }, | |
6 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x3f) }, | |
7 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0x7f) }, | |
8 => { _mm256_blend_ps(_mm256_loadu_ps(array), compress, 0xff) }, | |
_ => { _mm256_loadu_ps(array) }, | |
}; | |
_mm256_storeu_ps(array, ret); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment