Last active
December 31, 2024 19:57
-
-
Save izanbf1803/7935d08a6434c7b3aad774ef563a6ee1 to your computer and use it in GitHub Desktop.
Test whether using hashing in O(n) algorithm is faster than sorting. Arised from a Twitter discussion about Big O notation.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <iomanip> | |
#include <vector> | |
#include <unordered_set> | |
#include <algorithm> | |
#include <stdlib.h> // qsort | |
#include <chrono> | |
#include <cassert> | |
using std::chrono::high_resolution_clock; | |
using std::chrono::duration_cast; | |
using std::chrono::duration; | |
using std::chrono::milliseconds; | |
int solution_hashtable(const std::vector<int>& arr) { // assume arr.size() >= 1 | |
std::unordered_set<int> htable; | |
htable.reserve(1.5 * arr.size()); | |
for (int x : arr) { | |
htable.insert(x); | |
} | |
int best = 1; | |
for (int x : arr) { | |
if (htable.find(x-1) == htable.end()) { | |
int count = 1; | |
while (htable.find(x+1) != htable.end()) { | |
++count; | |
++x; | |
} | |
best = std::max(best, count); | |
} | |
} | |
return best; | |
} | |
// from https://cplusplus.com/reference/cstdlib/qsort/ | |
int compare (const void * a, const void * b) { | |
return ( *(int*)a - *(int*)b ); | |
} | |
int solution_qsort(std::vector<int> arr) { // assume arr.size() >= 1 | |
int n = arr.size(); | |
qsort(arr.data(), n, sizeof(int), compare); | |
int best = 1; | |
int curr = 1; | |
for (int i = 1; i < n; ++i) { | |
if (arr[i] == arr[i-1] + 1) { | |
++curr; | |
best = std::max(best, curr); | |
} | |
else if (arr[i] != arr[i-1]) { | |
curr = 1; | |
} | |
} | |
return best; | |
} | |
int solution_stdsort(std::vector<int> arr) { // assume arr.size() >= 1 | |
int n = arr.size(); | |
std::sort(arr.begin(), arr.end()); | |
int best = 1; | |
int curr = 1; | |
for (int i = 1; i < n; ++i) { | |
if (arr[i] == arr[i-1] + 1) { | |
++curr; | |
best = std::max(best, curr); | |
} | |
else if (arr[i] != arr[i-1]) { | |
curr = 1; | |
} | |
} | |
return best; | |
} | |
int main() { | |
srand(time(0)); | |
int RUNS = 100; | |
int N = 1e5; | |
double hashtable_avg = 0; | |
double qsort_avg = 0; | |
double stdsort_avg = 0; | |
for (int run = 0; run < RUNS; ++run) { | |
std::vector<int> testcase(N); | |
for (int i = 0; i < N; ++i) { | |
testcase[i] = rand() % N; | |
} | |
auto t1 = high_resolution_clock::now(); | |
int hashtable_sol = solution_hashtable(testcase); | |
auto t2 = high_resolution_clock::now(); | |
auto hashtable_time = duration_cast<std::chrono::microseconds>(t2 - t1); | |
t1 = high_resolution_clock::now(); | |
int qsort_sol = solution_qsort(testcase); | |
t2 = high_resolution_clock::now(); | |
auto qsort_time = duration_cast<std::chrono::microseconds>(t2 - t1); | |
t1 = high_resolution_clock::now(); | |
int stdsort_sol = solution_stdsort(testcase); | |
t2 = high_resolution_clock::now(); | |
auto stdsort_time = duration_cast<std::chrono::microseconds>(t2 - t1); | |
assert(hashtable_sol == qsort_sol); | |
assert(qsort_sol == stdsort_sol); | |
hashtable_avg += hashtable_time.count(); | |
qsort_avg += qsort_time.count(); | |
stdsort_avg += stdsort_time.count(); | |
} | |
hashtable_avg /= RUNS; | |
qsort_avg /= RUNS; | |
stdsort_avg /= RUNS; | |
std::cout << std::fixed; | |
std::cout << std::setprecision(2); | |
std::cout << "ARRAY SIZES: " << N << std::endl; | |
std::cout << "RUNS: " << RUNS << std::endl; | |
std::cout << "HASH TABLE avg." << std::setw(12) << hashtable_avg << "μs" << std::endl; | |
std::cout << "QSORT avg." << std::setw(12) << qsort_avg << "μs" << std::endl; | |
std::cout << "STD::SORT avg." << std::setw(12) << stdsort_avg << "μs" << std::endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment