Skip to content

Instantly share code, notes, and snippets.

@XiangpengHao
Last active January 19, 2021 09:42
Show Gist options
  • Save XiangpengHao/ddd63d6f6dc60d701583aae4c838787f to your computer and use it in GitHub Desktop.
Save XiangpengHao/ddd63d6f6dc60d701583aae4c838787f to your computer and use it in GitHub Desktop.
Code used for testing clwb instruction. https://blog.haoxp.xyz/posts/is-clwb-implemented/
#include <glog/logging.h>
#include <x86intrin.h>
#include <chrono>
#include <iostream>
using namespace std::chrono;
#define TIME_BODY(name, body) \
do { \
auto start = high_resolution_clock::now(); \
do { \
body \
} while (false); \
auto end = high_resolution_clock::now(); \
std::cout << name << ": " \
<< duration_cast<duration<double>>(end - start).count() \
<< std::endl; \
} while (false)
/* 20M array can fit into LLC */
static const constexpr uint64_t kArraySize = 1024 * 1024 * 20;
static const constexpr uint64_t kArrayLen = kArraySize / sizeof(uint64_t);
static const constexpr uint64_t kCacheLineSize = 64;
void clwb_array(uint64_t *array) {
for (uint64_t i = 0; i < kArraySize; i += kCacheLineSize) {
_mm_clwb((char *)array + i);
}
_mm_mfence();
}
void clflush_array(uint64_t *array) {
for (uint64_t i = 0; i < kArraySize; i += kCacheLineSize) {
_mm_clflush((char *)array + i);
}
_mm_mfence();
}
void clflushopt_array(uint64_t *array) {
for (uint64_t i = 0; i < kArraySize; i += kCacheLineSize) {
_mm_clflushopt((char *)array + i);
}
_mm_mfence();
}
uint64_t *prepare_array() {
uint64_t *array{nullptr};
posix_memalign((void **)(&array), kCacheLineSize, kArraySize);
for (uint64_t i = 0; i < kArrayLen; i += 1) {
array[i] = i;
}
clflush_array(array);
return array;
}
uint64_t read(uint64_t *array) {
__m128i sum = _mm_set1_epi64x(0);
for (uint64_t i = 0; i < kArrayLen; i += 2) {
auto array_seg = _mm_load_si128((__m128i *)(array + i));
sum = _mm_add_epi64(sum, array_seg);
}
return _mm_extract_epi64(sum, 0);
}
uint64_t read_nt(uint64_t *array) {
__m128i sum = _mm_set1_epi64x(0);
for (uint64_t i = 0; i < kArrayLen; i += 2) {
auto array_seg = _mm_stream_load_si128((__m128i *)(array + i));
sum = _mm_add_epi64(sum, array_seg);
}
return _mm_extract_epi64(sum, 0);
}
int main() {
uint64_t *array = prepare_array();
uint64_t dummy = read(array);
TIME_BODY("read:cache", { LOG_IF(FATAL, read_nt(array) != dummy); });
TIME_BODY("read_nt:cache", { LOG_IF(FATAL, read_nt(array) != dummy); });
clwb_array(array);
TIME_BODY("read:clwb", { LOG_IF(FATAL, read(array) != dummy); });
clwb_array(array);
TIME_BODY("read_nt:clwb", { LOG_IF(FATAL, read_nt(array) != dummy); });
clflushopt_array(array);
TIME_BODY("read:clflushopt", { LOG_IF(FATAL, read(array) != dummy); });
clflushopt_array(array);
TIME_BODY("read_nt:clflushopt", { LOG_IF(FATAL, read_nt(array) != dummy); });
clflush_array(array);
TIME_BODY("read:clflush", { LOG_IF(FATAL, read(array) != dummy); });
clflush_array(array);
TIME_BODY("read_nt:clflush", { LOG_IF(FATAL, read_nt(array) != dummy); });
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment