Last active
January 19, 2021 09:42
-
-
Save XiangpengHao/ddd63d6f6dc60d701583aae4c838787f to your computer and use it in GitHub Desktop.
Code used for testing clwb instruction. https://blog.haoxp.xyz/posts/is-clwb-implemented/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <glog/logging.h> | |
#include <x86intrin.h> | |
#include <chrono> | |
#include <iostream> | |
using namespace std::chrono; | |
#define TIME_BODY(name, body) \ | |
do { \ | |
auto start = high_resolution_clock::now(); \ | |
do { \ | |
body \ | |
} while (false); \ | |
auto end = high_resolution_clock::now(); \ | |
std::cout << name << ": " \ | |
<< duration_cast<duration<double>>(end - start).count() \ | |
<< std::endl; \ | |
} while (false) | |
/* 20M array can fit into LLC */ | |
static const constexpr uint64_t kArraySize = 1024 * 1024 * 20; | |
static const constexpr uint64_t kArrayLen = kArraySize / sizeof(uint64_t); | |
static const constexpr uint64_t kCacheLineSize = 64; | |
void clwb_array(uint64_t *array) { | |
for (uint64_t i = 0; i < kArraySize; i += kCacheLineSize) { | |
_mm_clwb((char *)array + i); | |
} | |
_mm_mfence(); | |
} | |
void clflush_array(uint64_t *array) { | |
for (uint64_t i = 0; i < kArraySize; i += kCacheLineSize) { | |
_mm_clflush((char *)array + i); | |
} | |
_mm_mfence(); | |
} | |
void clflushopt_array(uint64_t *array) { | |
for (uint64_t i = 0; i < kArraySize; i += kCacheLineSize) { | |
_mm_clflushopt((char *)array + i); | |
} | |
_mm_mfence(); | |
} | |
uint64_t *prepare_array() { | |
uint64_t *array{nullptr}; | |
posix_memalign((void **)(&array), kCacheLineSize, kArraySize); | |
for (uint64_t i = 0; i < kArrayLen; i += 1) { | |
array[i] = i; | |
} | |
clflush_array(array); | |
return array; | |
} | |
uint64_t read(uint64_t *array) { | |
__m128i sum = _mm_set1_epi64x(0); | |
for (uint64_t i = 0; i < kArrayLen; i += 2) { | |
auto array_seg = _mm_load_si128((__m128i *)(array + i)); | |
sum = _mm_add_epi64(sum, array_seg); | |
} | |
return _mm_extract_epi64(sum, 0); | |
} | |
uint64_t read_nt(uint64_t *array) { | |
__m128i sum = _mm_set1_epi64x(0); | |
for (uint64_t i = 0; i < kArrayLen; i += 2) { | |
auto array_seg = _mm_stream_load_si128((__m128i *)(array + i)); | |
sum = _mm_add_epi64(sum, array_seg); | |
} | |
return _mm_extract_epi64(sum, 0); | |
} | |
int main() { | |
uint64_t *array = prepare_array(); | |
uint64_t dummy = read(array); | |
TIME_BODY("read:cache", { LOG_IF(FATAL, read_nt(array) != dummy); }); | |
TIME_BODY("read_nt:cache", { LOG_IF(FATAL, read_nt(array) != dummy); }); | |
clwb_array(array); | |
TIME_BODY("read:clwb", { LOG_IF(FATAL, read(array) != dummy); }); | |
clwb_array(array); | |
TIME_BODY("read_nt:clwb", { LOG_IF(FATAL, read_nt(array) != dummy); }); | |
clflushopt_array(array); | |
TIME_BODY("read:clflushopt", { LOG_IF(FATAL, read(array) != dummy); }); | |
clflushopt_array(array); | |
TIME_BODY("read_nt:clflushopt", { LOG_IF(FATAL, read_nt(array) != dummy); }); | |
clflush_array(array); | |
TIME_BODY("read:clflush", { LOG_IF(FATAL, read(array) != dummy); }); | |
clflush_array(array); | |
TIME_BODY("read_nt:clflush", { LOG_IF(FATAL, read_nt(array) != dummy); }); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment