Skip to content

Instantly share code, notes, and snippets.

@bartoszkp
Last active December 11, 2020 22:23
Show Gist options
  • Save bartoszkp/64d47743ec79de7e88a2e4d0fa6ce24f to your computer and use it in GitHub Desktop.
Save bartoszkp/64d47743ec79de7e88a2e4d0fa6ce24f to your computer and use it in GitHub Desktop.
// Intel(R) Core(TM) i7-2600 CPU @ 3.40 GHz 3.40 GHz
// 16 GB RAM, Windows 7, 64 bit
//
// cl 19
// /GS /GL /W3 /Gy /Zc:wchar_t /Zi /Gm- /O2 /Zc:inline /fp:precise /D "NDEBUG" /D "_CONSOLE" /D "_UNICODE" /D "UNICODE" /WX- /Zc:forScope /Gd /Oi /MD /EHsc /nologo /Ot
//
// 1000 random vectors with 1 000 000 elements each.
// 11 tests: with 0%, 10%, 20%, ..., 90%, 100% duplicates in vectors.
// Ratio: 0
// set_copy_if : Time : 618.162 ms +- 18.7261 ms
// set_remove_if : Time : 650.453 ms +- 10.0107 ms
// sort : Time : 212.366 ms +- 5.27977 ms
// Ratio : 0.1
// set_copy_if : Time : 34.1907 ms +- 1.51335 ms
// set_remove_if : Time : 24.2709 ms +- 0.517165 ms
// sort : Time : 43.735 ms +- 1.44966 ms
// Ratio : 0.2
// set_copy_if : Time : 29.5399 ms +- 1.32403 ms
// set_remove_if : Time : 20.4138 ms +- 0.759438 ms
// sort : Time : 36.4204 ms +- 1.60568 ms
// Ratio : 0.3
// set_copy_if : Time : 32.0227 ms +- 1.25661 ms
// set_remove_if : Time : 22.3386 ms +- 0.950855 ms
// sort : Time : 38.1551 ms +- 1.12852 ms
// Ratio : 0.4
// set_copy_if : Time : 30.2714 ms +- 1.28494 ms
// set_remove_if : Time : 20.8338 ms +- 1.06292 ms
// sort : Time : 35.282 ms +- 2.12884 ms
// Ratio : 0.5
// set_copy_if : Time : 24.3247 ms +- 1.21664 ms
// set_remove_if : Time : 16.1621 ms +- 1.27802 ms
// sort : Time : 27.3166 ms +- 2.12964 ms
// Ratio : 0.6
// set_copy_if : Time : 27.3268 ms +- 1.06058 ms
// set_remove_if : Time : 18.4379 ms +- 1.1438 ms
// sort : Time : 30.6846 ms +- 2.52412 ms
// Ratio : 0.7
// set_copy_if : Time : 30.3871 ms +- 0.887492 ms
// set_remove_if : Time : 20.6315 ms +- 0.899802 ms
// sort : Time : 33.7643 ms +- 2.2336 ms
// Ratio : 0.8
// set_copy_if : Time : 33.3077 ms +- 0.746272 ms
// set_remove_if : Time : 22.9459 ms +- 0.921515 ms
// sort : Time : 37.119 ms +- 2.20924 ms
// Ratio : 0.9
// set_copy_if : Time : 36.0888 ms +- 0.763978 ms
// set_remove_if : Time : 24.7002 ms +- 0.465711 ms
// sort : Time : 40.8233 ms +- 2.59826 ms
// Ratio : 1
// set_copy_if : Time : 21.5609 ms +- 1.48986 ms
// set_remove_if : Time : 14.2934 ms +- 0.535431 ms
// sort : Time : 24.2485 ms +- 0.710269 ms
// Context: http://stackoverflow.com/a/37210171/2642204
#include <algorithm>
#include <chrono>
#include <cmath>
#include <functional>
#include <iostream>
#include <iterator>
#include <memory>
#include <numeric>
#include <random>
#include <set>
#include <string>
#include <tuple>
#include <vector>
using namespace std;
using namespace chrono;
unique_ptr<vector<int>> generateRandomVector(int elementCount, double duplicatesRatio)
{
static random_device r;
static uniform_int_distribution<int> uniform_dist(-1000000, 1000000);
static default_random_engine randomEngine(r());
auto result = make_unique<vector<int>>(elementCount);
result->reserve(elementCount);
auto duplicateCount = elementCount * duplicatesRatio;
for (int i = 0; i < elementCount; ++i)
{
auto value = uniform_dist(randomEngine);
result->push_back(value);
for (int j = 0; j < duplicateCount; ++j)
{
result->push_back(value);
++i;
}
}
std::random_shuffle(result->begin(), result->end());
return result;
}
tuple<double, double> timeFunction(function<void(const vector<int>&, vector<int>&)>& experiment, const vector<unique_ptr<vector<int>>>& data)
{
auto count = data.size();
vector<double> measurements;
measurements.reserve(count);
for (auto& input : data)
{
vector<int> output(*input.get());
auto start = high_resolution_clock::now();
experiment(*input.get(), output);
auto end = high_resolution_clock::now();
auto d = duration<double, milli>(end - start).count();
measurements.push_back(d);
}
double average
= accumulate(measurements.begin(), measurements.end(), 0.0)
/ count;
transform(
measurements.begin(),
measurements.end(),
measurements.begin(),
[&](double e) { return (e - average) * (e - average); });
auto variance
= accumulate(measurements.begin(), measurements.end(), 0.0)
/ count;
return tuple<double, double>(average, sqrt(variance));
}
void uniquifyWithOrder_set_copy_if(const vector<int>& input, vector<int>& output)
{
struct NotADuplicate
{
bool operator()(const int& element)
{
return _s.insert(element).second;
}
private:
set<int> _s;
};
vector<int> uniqueNumbers;
NotADuplicate pred;
output.clear();
output.reserve(input.size());
copy_if(
input.begin(),
input.end(),
back_inserter(output),
ref(pred));
}
void uniquifyWithOrder_set_remove_if(const vector<int>& input, vector<int>& output)
{
set<int> seen;
auto newEnd = remove_if(output.begin(), output.end(), [&seen](const int& value)
{
if (seen.find(value) != end(seen))
return true;
seen.insert(value);
return false;
});
output.erase(newEnd, output.end());
}
void uniquifyWithOrder_sort(const vector<int>&, vector<int>& output)
{
using It = vector<int>::iterator;
struct target_less
{
bool operator()(It const &a, It const &b) const { return *a < *b; }
};
struct target_equal
{
bool operator()(It const &a, It const &b) const { return *a == *b; }
};
auto begin = output.begin();
auto const end = output.end();
{
vector<It> v;
v.reserve(static_cast<size_t>(distance(begin, end)));
for (auto i = begin; i != end; ++i)
{
v.push_back(i);
}
sort(v.begin(), v.end(), target_less());
v.erase(unique(v.begin(), v.end(), target_equal()), v.end());
sort(v.begin(), v.end());
size_t j = 0;
for (auto i = begin; i != end && j != v.size(); ++i)
{
if (i == v[j])
{
using std::iter_swap; iter_swap(i, begin);
++j;
++begin;
}
}
}
output.erase(begin, output.end());
}
void printResults(
const string& name,
const tuple<double, double>& result)
{
cout << name
<< ": "
<< "Time: "
<< get<0>(result)
<< " ms"
<< " +- "
<< get<1>(result)
<< " ms"
<< endl;
}
void runTestForDuplicateRatio(double duplicateRatio)
{
vector<unique_ptr<vector<int>>> data;
for (int i = 0; i < 1000; ++i)
{
data.push_back(generateRandomVector(1000000, duplicateRatio));
}
typedef function<void(const vector<int>&, vector<int>&)> Experiment;
Experiment set_copy_if = [&](const vector<int>& i, vector<int>& o)
{ uniquifyWithOrder_set_copy_if(i, o); };
Experiment set_remove_if = [&](const vector<int>& i, vector<int>& o)
{ uniquifyWithOrder_set_remove_if(i, o); };
Experiment sort = [&](const vector<int>& i, vector<int>& o)
{ uniquifyWithOrder_sort(i, o); };
auto resultSetCopyIf = timeFunction(set_copy_if, data);
auto resultSetRemoveIf = timeFunction(set_remove_if, data);
auto resultSort = timeFunction(sort, data);
printResults("set_copy_if", resultSetCopyIf);
printResults("set_remove_if", resultSetRemoveIf);
printResults("sort", resultSort);
}
int main(int argc, char** argv)
{
vector<double> ratios{ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 };
for (auto ratio : ratios)
{
cout << "Ratio: " << ratio << endl;
runTestForDuplicateRatio(ratio);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment