Last active
November 10, 2017 22:52
-
-
Save bbkane/d86276178f339ee79af1577ba19563d8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <cassert> | |
#include <chrono> | |
#include <iostream> | |
#include <vector> | |
#include <random> | |
// NOTE: All iterator concepts are experimental. I'm not going | |
// to concentrate on them | |
// See: http://en.cppreference.com/w/cpp/concept | |
using data_type = int; | |
// -- Helper I/O functions | |
template <typename ForwardIterator> | |
void print_container(ForwardIterator begin_it, ForwardIterator end_it) | |
{ | |
while (begin_it != end_it) | |
{ | |
std::cout << *begin_it << " "; | |
++begin_it; | |
} | |
std::cout << std::endl; | |
} | |
// println: like Python's print with sep=" " and end="\n" | |
// useful for debugging bad sort functions | |
template <typename Head> | |
void println(const Head& head) | |
{ | |
std::cout << head << " " << "\n"; | |
} | |
template <typename Head, typename ...Tail> | |
void println(const Head& head, const Tail&... tail) | |
{ | |
std::cout << head << " "; | |
println(tail...); | |
} | |
// -- Sorts | |
// TODO: don't break this on empty vectors | |
template <typename ForwardIt> | |
void bubblesort(ForwardIt begin_it, ForwardIt end_it) | |
{ | |
auto did_swap = true; | |
while (did_swap) | |
{ | |
did_swap = false; | |
for(auto i = begin_it; i + 1 != end_it; ++i) | |
{ | |
if (*i > *(i + 1)) | |
{ | |
std::iter_swap(i, i + 1); | |
did_swap = true; | |
} | |
} | |
} | |
} | |
// I don't know the proper name for this, but the idea is to search to the end | |
// to find the max element, swap the max element with the end element, shrink | |
// the vector by one and repeat | |
template <typename BidirectionalIt> | |
void minskersort(BidirectionalIt begin_it, BidirectionalIt end_it) | |
{ | |
// TODO: don't make copies of the args | |
auto last_it = end_it - 1; | |
// NOTE: this is basically a pointer, right? So swapping the underlying | |
// value shouldn't change the value of the iterator I think | |
auto first_it = begin_it; | |
while (last_it != first_it) | |
{ | |
// init the max to the first value | |
auto max_it = first_it; | |
// TODO: off by 1 error? | |
for(auto i = first_it; i != last_it + 1; ++i) | |
{ | |
if (*i > *max_it) | |
{ | |
max_it = i; | |
} | |
} | |
std::iter_swap(max_it, last_it); | |
--last_it; | |
} | |
} | |
template <typename RandomAccessIt> | |
void quicksort(RandomAccessIt begin_it, RandomAccessIt end_it) | |
{ | |
} | |
// -- Main Code | |
template<typename SortFunc> | |
void benchmark(const char* name, SortFunc sort_func, std::vector<data_type> v) | |
{ | |
using nanoseconds_t = std::chrono::duration < int, std::nano >; | |
std::cout << "---" << std::endl; | |
std::cout << name << std::endl; | |
// print_container(v.begin(), v.end()); | |
//start time | |
auto start = std::chrono::high_resolution_clock::now(); | |
sort_func(v.begin(), v.end()); | |
auto end = std::chrono::high_resolution_clock::now(); | |
auto duration = std::chrono::duration_cast<nanoseconds_t>(end - start); | |
// check it | |
auto tmp = v; | |
std::sort(tmp.begin(), tmp.end()); | |
assert(v == tmp); | |
// print_container(v.begin(), v.end()); | |
println(duration.count(), "ns"); | |
} | |
std::vector<int> make_random_vector(size_t size) | |
{ | |
std::mt19937 rng; | |
rng.seed(std::random_device()()); | |
// std::uniform_int_distribution<decltype(rng)::result_type> dist(0, size -1); | |
// TODO: use a size_t distribution? | |
std::uniform_int_distribution<int> dist(0, static_cast<int>(size - 1)); | |
std::vector<int> v; | |
v.reserve(size); | |
for (size_t i = 0; i < size; ++i) | |
{ | |
v.push_back(dist(rng)); | |
} | |
return v; | |
} | |
int main() | |
{ | |
auto v = make_random_vector(40); | |
// https://stackoverflow.com/a/47209691/2958070 | |
using iterator_type = decltype(v)::iterator; | |
benchmark("bubblesort", bubblesort<iterator_type>, v); | |
// // benchmark("quicksort", quicksort, v); | |
// // benchmark("heapsort", heapsort, v); | |
// // benchmark("mergesort", mergesort, v); | |
// // benchmark("insertionsort", insertionsort, v); | |
benchmark("minskersort", minskersort<iterator_type>, v); | |
} |
Just noticed that the attributes I added to silence the unused variable warning on GCC caused it not to compile with Clang or Visual Studio; here's a version that should work with all three compilers:
https://wandbox.org/permlink/b8fTKvDNGxLXHYHM
https://gist.github.com/rayhamel/418214600180438622a516e6c8f468bf
@rayhamel thanks- I got a quick look at your code but I won't really be able to dig in until this weekend. It's not a school assignment, just a personal project, and I'm interested in the whole thing. In fact, the benchmarking code might be more practically beneficial than the sorting code :) . Once again, thanks! I'll probably add some comments there if I have questions
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
First of all, I'm glad you liked my answer on SO, and took my advice about randomizing the buffer comments.
As for things to look at in this code, a 40-element buffer is several orders of magnitude too small to obtain a meaningful benchmark, even at the nanosecond scale; the sorting operation will be effectively instantaneous and you're effectively just measuring how fast your PC can access its location in memory.
A decent size to start with is
0x80000 / sizeof(decltype(v)::value_type)
, which is 512 KB, or 131,072 32-bitint
's. This is large enough to get a meaningful measurement, and it's also the typical size of an L2 cache line, so it's the largest number that's still small enough you can be reasonably sure you're benchmarking the algorithm and not memory latency.For benchmarking purposes, I prefer the memory buffer being sorted be allocated on the stack or statically, and the same buffer reused between benchmarks, so that variation due to cache effects are minimized.
To verify a collection is sorted you can just use
std::is_sorted()
.I wrote up how I would do your benchmarking code here:
https://wandbox.org/permlink/OY5w1Dd08yH9CRKY (scroll down to see the output)
Gist link of the same.
I tried to write a clean and extensible design that minimizes external sources of variation between benchmarks, ensures the correctness of the sorting algorithms, and employs several redundant methods to prevent the compiler from optimizing away any part of the sorting operation.
Notice that
std::sort
sorts the 131-thousand-element array in 6 million nanoseconds, or 6 milliseconds! Computers are really fast nowadays.Please feel free to borrow my code—I assume your assignment is more about writing the sorting algorithms than the benchmarking boilerplate. :)