Last active
December 23, 2015 15:39
-
-
Save wuye9036/6657313 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "stdafx.h" | |
#include <utility> | |
#include <chrono> | |
#include <vector> | |
#include <algorithm> | |
#include <iostream> | |
#include <future> | |
#include <atomic> | |
#include <thread> | |
#include <algorithm> | |
#if defined(WIN32) | |
#define NOMINMAX | |
#include <Windows.h> | |
#endif | |
using namespace std; | |
#if !defined(_DEBUG) | |
static const int ARRAY_SIZE = 2048 * 2048 * 4; | |
#else | |
static const int ARRAY_SIZE = 512 * 512 * 4; | |
#endif | |
struct k_best | |
{ | |
public: | |
k_best(int k, float epsilon, int max_test_count) | |
{ | |
min_time_ = std::numeric_limits<int64_t>::max(); | |
if(k == 0) | |
{ | |
k_ = 1; | |
} | |
else | |
{ | |
k_ = k; | |
} | |
if(max_test_count < k_) | |
{ | |
max_tests_ = k_; | |
} | |
else | |
{ | |
max_tests_ = max_test_count; | |
} | |
if(epsilon < 0.0f) | |
{ | |
eps_ = 0.05f; | |
} | |
else | |
{ | |
eps_ = epsilon; | |
} | |
#if defined(WIN32) | |
LARGE_INTEGER freq; | |
QueryPerformanceFrequency(&freq); | |
perf_freq_ = freq.QuadPart; | |
#endif | |
} | |
pair<bool, int64_t> test(function<void ()> const& fn) | |
{ | |
for(int i = 0; i < max_tests_; ++i) | |
{ | |
#if defined(WIN32) | |
LARGE_INTEGER beg_time, end_time; | |
QueryPerformanceCounter(&beg_time); | |
fn(); | |
QueryPerformanceCounter(&end_time); | |
int64_t elapsedUS = static_cast<int64_t>( double(end_time.QuadPart - beg_time.QuadPart) / double(perf_freq_) * 1000000 ); | |
#else | |
auto startTime = chrono::high_resolution_clock::now(); | |
fn(); | |
auto endTime = chrono::high_resolution_clock::now(); | |
int64_t elapsedUS = chrono::duration_cast<chrono::microseconds>(endTime-startTime).count(); | |
#endif | |
min_time_ = std::min(min_time_, elapsedUS); | |
heap_.push_back(elapsedUS); | |
push_heap(heap_.begin(), heap_.end()); | |
if(heap_.size() > static_cast<size_t>(k_)) | |
{ | |
pop_heap(heap_.begin(), heap_.end()); | |
heap_.pop_back(); | |
} | |
if(heap_.size() == static_cast<size_t>(k_)) | |
{ | |
if( static_cast<float>(heap_.front()) < static_cast<float>(min_time_ * (1.0f + eps_)) ) | |
{ | |
cout << "Convergence: " << i+1 << " = " ; | |
return make_pair( true, heap_.front() ); | |
} | |
} | |
} | |
cout << "Convergence: No = " ; | |
return make_pair(false, heap_.front()); | |
} | |
private: | |
int k_; | |
float eps_; | |
int max_tests_; | |
std::vector<int64_t> heap_; | |
int64_t min_time_; | |
int64_t perf_freq_; | |
}; | |
struct calc | |
{ | |
calc() | |
{ | |
base = new float[ARRAY_SIZE]; | |
} | |
virtual ~calc() | |
{ | |
delete [] base; | |
} | |
void init() | |
{ | |
for(int i = 0; i < ARRAY_SIZE; ++i) | |
{ | |
base[i] = static_cast<float>(i); | |
} | |
} | |
inline void do_with_fnptr(int i) | |
{ | |
fn(base, i); | |
} | |
inline void do_with_mfnptr(int i) | |
{ | |
(this->*mfn)(i); | |
} | |
inline void do_with_fnobj(int i) | |
{ | |
fnobj(i); | |
} | |
virtual void do_with_vfn(int i) = 0; | |
public: | |
float* base; | |
void (*fn)(float* base, int i); | |
void (calc::*mfn)(int i); | |
std::function<void (int)> fnobj; | |
static void add2(float* base, int i) | |
{ | |
base[i] += 2; | |
} | |
static void mul2(float* base, int i) | |
{ | |
base[i] *= 7.16f; | |
} | |
static void do_nothing(float* /*base*/, int /*i*/) | |
{ | |
} | |
void madd2(int i) | |
{ | |
*(base+i) += 2; | |
} | |
void mmul2(int i) | |
{ | |
*(base+i) *= 7.16f; | |
} | |
}; | |
struct calc_add2: public calc | |
{ | |
calc_add2() | |
{ | |
fn = &calc::add2; | |
mfn = &calc::madd2; | |
fnobj = [this](int i) { this->base[i] += 2; }; | |
} | |
void do_with_vfn(int i) | |
{ | |
base[i] += 2; | |
} | |
}; | |
struct calc_mul2: public calc | |
{ | |
calc_mul2() | |
{ | |
fn = &calc::mul2; | |
mfn = &calc::mmul2; | |
fnobj = [this](int i) { this->base[i] *= 7.16f; }; | |
} | |
void do_with_vfn(int i) | |
{ | |
base[i] *= 7.16f; | |
} | |
}; | |
int _tmain(int argc, _TCHAR* argv[]) | |
{ | |
chrono::duration<chrono::high_resolution_clock::rep, chrono::high_resolution_clock::period> duration; | |
calc* obj = nullptr; | |
if(argc == 1) | |
{ | |
obj = new calc_add2(); | |
} | |
else | |
{ | |
obj = new calc_mul2(); | |
} | |
{ | |
k_best measure(16, 0.03f, 500); | |
obj->init(); | |
auto test_result = measure.test( [=]() | |
{ | |
if(argc == 1) | |
{ | |
for(int i = 0; i < ARRAY_SIZE; ++i) | |
{ | |
obj->base[i] += 2; | |
} | |
} | |
else | |
{ | |
for(int i = 0; i < ARRAY_SIZE; ++i) | |
{ | |
obj->base[i] *= 7.16f; | |
} | |
} | |
}); | |
cout << "Branch per batch Elapsed: " << test_result.second << "us" << endl; | |
} | |
{ | |
obj->init(); | |
k_best measure(16, 0.03f, 500); | |
auto test_result = measure.test( [=]() | |
{ | |
for(int i = 0; i < ARRAY_SIZE; ++i) | |
{ | |
if(argc == 1) | |
{ | |
obj->base[i] += 2; | |
} | |
else | |
{ | |
obj->base[i] *= 7.16f; | |
} | |
} | |
}); | |
cout << "Branch per scalar Elapsed: " << test_result.second << "us" << endl; | |
} | |
{ | |
obj->init(); | |
k_best measure(16, 0.03f, 500); | |
auto test_result = measure.test( [=]() | |
{ | |
for(int i = 0; i < ARRAY_SIZE; ++i) | |
{ | |
obj->do_with_fnptr(i); | |
} | |
}); | |
cout << "Fn Ptr Elapsed: " << test_result.second << "us" << endl; | |
} | |
{ | |
obj->init(); | |
k_best measure(16, 0.03f, 500); | |
auto test_result = measure.test( [=]() | |
{ | |
for(int i = 0; i < ARRAY_SIZE; ++i) | |
{ | |
obj->do_with_vfn(i); | |
} | |
}); | |
cout << "Virtual Func Elapsed: " << test_result.second << "us" << endl; | |
} | |
{ | |
obj->init(); | |
k_best measure(16, 0.03f, 500); | |
auto test_result = measure.test( [=]() | |
{ | |
for(int i = 0; i < ARRAY_SIZE; ++i) | |
{ | |
obj->do_with_mfnptr(i); | |
} | |
}); | |
cout << "Member Func Ptr Elapsed: " << test_result.second << "us" << endl; | |
} | |
{ | |
obj->init(); | |
k_best measure(16, 0.03f, 500); | |
auto test_result = measure.test( [=]() | |
{ | |
for(int i = 0; i < ARRAY_SIZE; ++i) | |
{ | |
obj->do_with_fnobj(i); | |
} | |
}); | |
cout << "Function Object with Lambda Elapsed: " << test_result.second << "us" << endl; | |
} | |
delete obj; | |
system( "pause" ); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment