Created
October 23, 2014 00:13
-
-
Save preshing/4d28abad8da4e40cb1d4 to your computer and use it in GitHub Desktop.
Generate benchmarks for three implementations of a wait-free queue using C++11 atomics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Generate benchmarks for the three queue implementations shown in | |
// Jeff Preshing's CppCon 2014 talk, "How Ubisoft Montreal Develops Games for | |
// Multicore - Before and After C++11". | |
// | |
// Slides: https://github.com/CppCon/CppCon2014/blob/master/Presentations/How%20Ubisoft%20Montreal%20Develops%20Games%20for%20Multicore/How%20Ubisoft%20Montreal%20Develops%20Games%20for%20Multicore%20-%20Before%20and%20After%20C++11%20-%20Jeff%20Preshing%20-%20CppCon%202014.pdf?raw=true | |
// | |
#include <iostream> | |
#include <atomic> | |
#include <thread> | |
#include <chrono> | |
// 0 : Low-level (relaxed) C++11 atomics with standalone memory fences | |
// 1 : Low-level C++11 atomics with ordering constraints | |
// 2 : Sequentially consistent C++11 atomics | |
#define METHOD 0 | |
using namespace std; | |
template <class T, int size> | |
class CappedSPSCQueue | |
{ | |
private: | |
T m_items[size]; | |
atomic<int> m_writePos; | |
alignas(64) int m_readPos; | |
public: | |
CappedSPSCQueue() : m_writePos(0), m_readPos(0) {} | |
void clear() | |
{ | |
m_writePos = 0; | |
m_readPos = 0; | |
} | |
#if METHOD == 0 | |
bool tryPush(const T& item) | |
{ | |
int w = m_writePos.load(memory_order_relaxed); | |
if (w >= size) | |
return false; | |
m_items[w] = item; | |
atomic_thread_fence(memory_order_release); | |
m_writePos.store(w + 1, memory_order_relaxed); | |
return true; | |
} | |
bool tryPop(T& item) | |
{ | |
int w = m_writePos.load(memory_order_relaxed); | |
if (m_readPos >= w) | |
return false; | |
atomic_thread_fence(memory_order_acquire); | |
item = m_items[m_readPos]; | |
m_readPos++; | |
return true; | |
} | |
#elif METHOD == 1 | |
bool tryPush(const T& item) | |
{ | |
int w = m_writePos.load(memory_order_relaxed); | |
if (w >= size) | |
return false; | |
m_items[w] = item; | |
m_writePos.store(w + 1, memory_order_release); | |
return true; | |
} | |
bool tryPop(T& item) | |
{ | |
int w = m_writePos.load(memory_order_acquire); | |
if (m_readPos >= w) | |
return false; | |
item = m_items[m_readPos]; | |
m_readPos++; | |
return true; | |
} | |
#elif METHOD == 2 | |
bool tryPush(const T& item) | |
{ | |
int w = m_writePos; | |
if (w >= size) | |
return false; | |
m_items[w] = item; | |
m_writePos = w + 1; | |
return true; | |
} | |
bool tryPop(T& item) | |
{ | |
int w = m_writePos; | |
if (m_readPos >= w) | |
return false; | |
item = m_items[m_readPos]; | |
m_readPos++; | |
return true; | |
} | |
#endif | |
}; | |
class Timer | |
{ | |
private: | |
chrono::high_resolution_clock::time_point t0, t1; | |
double span; | |
public: | |
Timer() | |
{ | |
span = 0; | |
} | |
void begin() | |
{ | |
t0 = chrono::high_resolution_clock::now(); | |
} | |
void end() | |
{ | |
t1 = chrono::high_resolution_clock::now(); | |
span += chrono::duration_cast<chrono::duration<double>>(t1 - t0).count(); | |
} | |
double duration() | |
{ | |
return span; | |
} | |
void clear() | |
{ | |
span = 0; | |
} | |
}; | |
static const int limit = 10000000; | |
CappedSPSCQueue<int, limit> TestQueue; | |
atomic<bool> ready(false); | |
Timer writeTimer, readTimer; | |
void writer() | |
{ | |
ready = true; | |
writeTimer.begin(); | |
for (int i = 0; i < limit; i++) | |
{ | |
TestQueue.tryPush(i); | |
} | |
writeTimer.end(); | |
} | |
void reader() | |
{ | |
static volatile int destination; | |
while (!ready) {} | |
readTimer.begin(); | |
for (int i = 0; i < limit; i++) | |
{ | |
int y; | |
while (!TestQueue.tryPop((int&) y)) {} | |
// Force the result to be stored somewhere, otherwise | |
// the compiler will optimize y away and not bother | |
// copying the item from the queue at all | |
destination = y; | |
} | |
readTimer.end(); | |
} | |
#if METHOD < 2 && !defined(__arm__) | |
static const int iterations = 100; | |
#else | |
static const int iterations = 20; | |
#endif | |
extern "C" | |
void tests() | |
{ | |
printf("METHOD = %d, queue size = %d\n", METHOD, limit); | |
writeTimer.clear(); | |
readTimer.clear(); | |
for (int i = 0; i < iterations; i++) | |
{ | |
TestQueue.clear(); | |
ready = false; | |
writer(); | |
reader(); | |
} | |
printf("separate: tryPush=%f ns/item, tryPop=%f ns/item\n", writeTimer.duration() / iterations / limit * 10e9, readTimer.duration() / iterations / limit * 10e9); | |
writeTimer.clear(); | |
readTimer.clear(); | |
for (int i = 0; i < iterations; i++) | |
{ | |
TestQueue.clear(); | |
ready = false; | |
thread t1(writer); | |
reader(); | |
t1.join(); | |
} | |
printf("together: tryPush=%f ns/item, tryPop=%f ns/item\n", writeTimer.duration() / iterations / limit * 10e9, readTimer.duration() / iterations / limit * 10e9); | |
} | |
int main(int argc, const char * argv[]) | |
{ | |
tests(); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment