Created
September 19, 2017 18:28
-
-
Save chfast/f87dc8c76340c17140593f33d71f3897 to your computer and use it in GitHub Desktop.
A prototype of Ethash lazy DAG initialization
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <array> | |
#include <atomic> | |
#include <chrono> | |
#include <iostream> | |
#include <random> | |
#include <thread> | |
#include <vector> | |
struct item | |
{ | |
std::array<std::atomic<uint64_t>, 8> a; | |
}; | |
int main() | |
{ | |
static_assert(sizeof(item) == 8 * sizeof(uint64_t), ""); | |
constexpr size_t t = 64; | |
constexpr size_t cache_size = size_t(3) * 1024 * 1024 * 1024; | |
constexpr size_t n = cache_size / sizeof(item); | |
constexpr size_t k = 2000000000 / t; | |
std::cout << "Cache size: " << cache_size << "\n"; | |
std::cout << "Cache items: " << n << "\n"; | |
std::cout << "Iterations: " << k << "\n"; | |
std::cout << "Threads: " << t << "\n\n"; | |
std::vector<item> dag(n); | |
std::array<std::thread, t> threads; | |
auto start_time = std::chrono::high_resolution_clock::now(); | |
std::atomic<uint64_t> global_sum; | |
for (auto& th: threads) | |
{ | |
th = std::thread{[&dag, &global_sum] | |
{ | |
uint64_t sum = 0; | |
std::mt19937 gen(std::random_device{}()); | |
std::uniform_int_distribution<size_t> dis{0, n - 1}; | |
for (size_t j = 0; j < k; ++j) | |
{ | |
size_t index = dis(gen); | |
item& it = dag[index]; | |
if (it.a[0] == 0) // Cache item not initialized. | |
{ | |
// Init item. Start from the end to set the first atomic | |
// as the last write. | |
for (auto i = it.a.rbegin(); i != it.a.rend(); ++i) | |
*i = index + (7 - (i - it.a.rbegin())); | |
} | |
for (size_t i = 0; i < it.a.size(); ++i) | |
{ | |
// Check if the item is valid. | |
uint64_t a = it.a[i]; | |
if (a != index + i) | |
{ | |
std::cerr << index << " " << i << ": " << a << "\n"; | |
throw 0; | |
} | |
// Do something with the item. | |
sum += a; | |
} | |
} | |
global_sum += sum; | |
}}; | |
} | |
for (auto&& th: threads) | |
th.join(); | |
auto duration = std::chrono::high_resolution_clock::now() - start_time; | |
auto duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count(); | |
auto access_rate = (1000 * t * k) / duration_ms; | |
auto bandwidth = access_rate * sizeof(item); | |
std::cout << "SUM: " << global_sum << "\n"; | |
std::cout << "ACCESS RATE: " << (access_rate / 1000000.0) << " M/s\n"; | |
std::cout << "BANDWIDTH: " << (bandwidth / 1000000000.0) << " GB/s\n"; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment