Last active
April 19, 2024 07:53
-
-
Save Wunkolo/b715746f1599acf2c7943f9bcd2ef1fd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdint> | |
#include <cstdio> | |
#include <bitset> | |
#include <immintrin.h> | |
// Attempts at implementing _mm_srai_epi8, _mm_slli_epi8, and _mm_srli_epi8 | |
// using affine galois field transformations(_mm_gf2p8affine_epi64_epi8, GNFI) | |
// Wed Nov 4 05:34:35 PM PST 2020 - [email protected] | |
inline __m128i _mm_srai_epi8(__m128i a, std::uint8_t imm8) | |
{ | |
const std::uint64_t sign_extend = ~(0xFFFFFFFFFFFFFFFF << (imm8 * 8)) & 0x8080808080808080; | |
return _mm_gf2p8affine_epi64_epi8( | |
a, | |
// Perform a logical shift right, but shift in 0x80 bytes | |
// to replicate the most significant bit | |
_mm_set1_epi64x( | |
0x0102040810204080 << (imm8 * 8) | |
// Simulate shifting in 0x80 bits by doing a bit-wise OR | |
// against masked 0x80 bytes | |
| sign_extend | |
//| _bzhi_u64(0x8080808080808080ull, imm8 * 8) // BMI2 | |
), | |
0 | |
); | |
} | |
inline __m128i _mm_slli_epi8(__m128i a, std::uint8_t imm8) | |
{ | |
return _mm_gf2p8affine_epi64_epi8( | |
a, _mm_set1_epi64x(0x0102040810204080 >> (imm8 * 8)), 0 | |
); | |
} | |
inline __m128i _mm_srli_epi8(__m128i a, std::uint8_t imm8) | |
{ | |
return _mm_gf2p8affine_epi64_epi8( | |
a, _mm_set1_epi64x(0x0102040810204080 << (imm8 * 8)), 0 | |
); | |
} | |
void Print128(const __m128i& Vector) | |
{ | |
const std::int8_t* Bytes = reinterpret_cast<const std::int8_t*>(&Vector); | |
std::putchar('\t'); for(std::size_t i=0;i<16;++i) std::printf("%s'",std::bitset<8>(Bytes[i]).to_string().c_str()); std::putchar('\n'); | |
std::putchar('\t'); for(std::size_t i=0;i<16;++i) std::printf("%d'",Bytes[i]); std::putchar('\n'); | |
} | |
int main() | |
{ | |
{ | |
const __m128i TestVector = _mm_set1_epi8(0b10010000); | |
std::puts("Input"); Print128(TestVector); | |
std::puts("SRA(3)"); Print128(_mm_srai_epi8(TestVector,3)); | |
std::puts("SLL(3)"); Print128(_mm_slli_epi8(TestVector,3)); | |
std::puts("SRL(3)"); Print128(_mm_srli_epi8(TestVector,3)); | |
} | |
} | |
// Input | |
// 10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000'10010000' | |
// -112'-112'-112'-112'-112'-112'-112'-112'-112'-112'-112'-112'-112'-112'-112'-112' | |
// SRA(3) | |
// 11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010'11110010' | |
// -14'-14'-14'-14'-14'-14'-14'-14'-14'-14'-14'-14'-14'-14'-14'-14' | |
// SLL(3) | |
// 10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000'10000000' | |
// -128'-128'-128'-128'-128'-128'-128'-128'-128'-128'-128'-128'-128'-128'-128'-128' | |
// SRL(3) | |
// 00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010'00010010' | |
// 18'18'18'18'18'18'18'18'18'18'18'18'18'18'18'18' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment