Created
February 16, 2020 18:45
-
-
Save glampert/6695d76f614c3de71870fd5921be79fe to your computer and use it in GitHub Desktop.
Silly memset function with optimizations for 4,8,16-byte aligned types.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdio> | |
#include <cstdint> | |
#include <cassert> | |
#include <xmmintrin.h> | |
/////////////////////////////////////////////////////////////////////////////// | |
namespace detail | |
{ | |
static inline bool IsAligned(const void* p, const size_t alignment) | |
{ | |
return !(reinterpret_cast<uintptr_t>(p) & (alignment - 1)); | |
} | |
template<typename T> | |
static inline T Expand(const uint8_t fillValue) | |
{ | |
alignas(T) uint8_t expanded[sizeof(T)]; | |
for (size_t i = 0; i < sizeof(T); ++i) | |
{ | |
expanded[i] = fillValue; | |
} | |
return *reinterpret_cast<T*>(expanded); | |
} | |
static inline void MemsetUnaligned(void* pDest, const uint8_t fillValue, const size_t sizeBytes) | |
{ | |
auto* pDestBytes = static_cast<uint8_t*>(pDest); | |
for (size_t i = 0; i < sizeBytes; ++i) | |
{ | |
*pDestBytes++ = fillValue; | |
} | |
} | |
template<typename T> | |
static inline void MemsetAlignedT(void* pDest, const uint8_t fillValue, const size_t sizeBytes) | |
{ | |
const size_t numElements = sizeBytes / sizeof(T); | |
const size_t remainder = sizeBytes % sizeof(T); | |
if (numElements != 0) | |
{ | |
const T fillValExpanded = Expand<T>(fillValue); | |
auto* pDestElements = static_cast<T*>(pDest); | |
for (size_t i = 0; i < numElements; ++i) | |
{ | |
*pDestElements++ = fillValExpanded; | |
} | |
} | |
// Remaining bytes < sizeof T. | |
if (remainder != 0) | |
{ | |
MemsetUnaligned(pDest, fillValue, remainder); | |
} | |
} | |
} // detail | |
void Memset(void* pDest, const uint8_t fillValue, const size_t sizeBytes) | |
{ | |
if (detail::IsAligned(pDest, sizeof(__m128))) // Set 16 bytes at a time | |
{ | |
detail::MemsetAlignedT<__m128>(pDest, fillValue, sizeBytes); | |
} | |
else if (detail::IsAligned(pDest, sizeof(uint64_t))) // Set 8 bytes at a time | |
{ | |
detail::MemsetAlignedT<uint64_t>(pDest, fillValue, sizeBytes); | |
} | |
else if (detail::IsAligned(pDest, sizeof(uint32_t))) // Set 4 bytes at a time | |
{ | |
detail::MemsetAlignedT<uint32_t>(pDest, fillValue, sizeBytes); | |
} | |
else // Unaligned - one byte at a time | |
{ | |
detail::MemsetUnaligned(pDest, fillValue, sizeBytes); | |
} | |
} | |
/////////////////////////////////////////////////////////////////////////////// | |
template<typename T> | |
static void CheckBytes(const T* pTestVal, const uint8_t fillValue) | |
{ | |
const auto* pBytes = reinterpret_cast<const uint8_t*>(pTestVal); | |
for (size_t i = 0; i < sizeof(T); ++i) | |
{ | |
assert(pBytes[i] == fillValue); | |
} | |
} | |
int main() | |
{ | |
const uint8_t fillValue = 0xAB; | |
uint8_t i8Val = {}; | |
Memset(&i8Val, fillValue, sizeof(i8Val)); | |
assert(i8Val == 0xAB); | |
uint16_t i16Val = {}; | |
Memset(&i16Val, fillValue, sizeof(i16Val)); | |
assert(i16Val == 0xABAB); | |
uint32_t i32Val = {}; | |
Memset(&i32Val, fillValue, sizeof(i32Val)); | |
assert(i32Val == 0xABABABAB); | |
uint64_t i64Val = {}; | |
Memset(&i64Val, fillValue, sizeof(i64Val)); | |
assert(i64Val == 0xABABABABABABABAB); | |
__m128 i128Val = {}; | |
Memset(&i128Val, fillValue, sizeof(i128Val)); | |
CheckBytes(&i128Val, fillValue); | |
char unaligned1[128] = {}; | |
Memset(&unaligned1, fillValue, sizeof(unaligned1)); | |
CheckBytes(unaligned1, fillValue); | |
char unaligned2[10] = {}; | |
Memset(&unaligned2, fillValue, sizeof(unaligned2)); | |
CheckBytes(unaligned2, fillValue); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment