Last active
April 25, 2023 03:05
-
-
Save rob-p/b14855ed2337ded9e3f0fb12d6b62390 to your computer and use it in GitHub Desktop.
ChatGPT AVX2 LCP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <immintrin.h> // for AVX2 intrinsics | |
#include <cstring> // for strlen | |
size_t longestCommonPrefixAVX2(const char* str1, const char* str2) { | |
const size_t len1 = strlen(str1); | |
const size_t len2 = strlen(str2); | |
const size_t len = std::min(len1, len2); | |
const __m256i* p1 = reinterpret_cast<const __m256i*>(str1); | |
const __m256i* p2 = reinterpret_cast<const __m256i*>(str2); | |
const __m256i* end = reinterpret_cast<const __m256i*>(str1 + (len & ~0x1F)); // round down to nearest multiple of 32 bytes | |
for (; p1 < end; p1++, p2++) { | |
__m256i v1 = _mm256_loadu_si256(p1); | |
__m256i v2 = _mm256_loadu_si256(p2); | |
__m256i cmp = _mm256_cmpeq_epi8(v1, v2); | |
int mask = _mm256_movemask_epi8(cmp); | |
if (mask != 0xFFFFFFFF) { | |
// found a mismatch, determine the position of the first differing byte | |
int pos = __builtin_ctz(~mask); | |
return (reinterpret_cast<const char*>(p1) - str1) + pos; | |
} | |
} | |
// handle the remaining bytes (less than 32) | |
for (; reinterpret_cast<const char*>(p1) < str1 + len; p1++, p2++) { | |
if (*reinterpret_cast<const char*>(p1) != *reinterpret_cast<const char*>(p2)) { | |
return reinterpret_cast<const char*>(p1) - str1; | |
} | |
} | |
// strings are identical up to the length of the shorter string | |
return len; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment