Skip to content

Instantly share code, notes, and snippets.

@dhruvbird
Last active August 29, 2015 14:15
Show Gist options
  • Save dhruvbird/ef0a5c2b1a8398a5d2bb to your computer and use it in GitHub Desktop.
Save dhruvbird/ef0a5c2b1a8398a5d2bb to your computer and use it in GitHub Desktop.
All the code to compare regular binary search with 2-level binary search
# Columns:
# log(N) N Time/Iter(usec)[std::lower_bound] N Time/Iter(usec)[l2search]
23 8388608 0.460176 8388608 0.269376
24 16777216 0.621821 16777216 0.435599
25 33554432 0.735565 33554432 0.615605
26 67108864 0.856156 67108864 0.598811
27 134217728 0.959821 134217728 0.636344
28 268435456 1.552199 268435456 0.771024
29 536870912 1.158228 536870912 0.818436
30 1073741824 1.936374 1073741824 0.905859
#include <iostream>
#include <algorithm>
#include <vector>
#include <sys/time.h>
#include <math.h>
#include <memory>
#include <assert.h>
#include <string.h>
#include <stdio.h>
using namespace std;
template <typename T>
struct Search2LevelArray {
T *start, *end;
size_t N;
size_t sqrtN;
std::unique_ptr<T[]> lookup;
Search2LevelArray(T *_s, T *_e)
: start(_s), end(_e), N(_e - _s), sqrtN(ceil(sqrt(N))) {
this->preprocess();
}
void preprocess() {
std::sort(start, end);
if (N > 16 * 1024) {
lookup.reset(new T[sqrtN - 1]);
for (size_t i = 0; i < sqrtN-1; i++) {
size_t idx = (i + 1) * sqrtN - 1;
if (idx >= N) {
idx = N - 1;
}
lookup[i] = start[idx];
}
}
}
T* lower_bound(T const &x) {
if (N <= 16 * 1024) {
return std::lower_bound(start, start+N, x);
}
auto it = std::equal_range(lookup.get(), lookup.get() + sqrtN - 1, x);
size_t b = (it.first - lookup.get()) * sqrtN;
size_t e = (it.second + 1 - lookup.get()) * sqrtN;
if (b >= N) return start+N;
if (e > N) { e = N; }
return std::lower_bound(start+b, start+e, x);
}
};
int main(int argc, char *argv[]) {
int reps = atoi(argv[1]);
int N = atoi(argv[2]);
int nQueries = atoi(argv[3]);
vector<int> v;
v.reserve(N);
for (int i = 0; i < N; ++i) {
v.push_back(i + 1023);
}
vector<int> queries;
queries.reserve(nQueries);
int mod = N;
// mod = sqrt(N);
for (int i = 0; i < nQueries; ++i) {
queries.push_back(rand() % mod);
}
// std::sort(v.begin(), v.end());
Search2LevelArray<int> l2search(&(v.front()), &(v.back()) + 1);
size_t z = 0;
struct timeval start, end;
if (argc > 4 && !strcmp(argv[4], "l2search")) {
gettimeofday(&start, NULL);
for (int i = 0; i < reps; ++i) {
for (int j = 0; j < nQueries; ++j) {
auto one = l2search.lower_bound(queries[j]) - &(v.front());
z += one;
}
}
} else {
gettimeofday(&start, NULL);
for (int i = 0; i < reps; ++i) {
for (int j = 0; j < nQueries; ++j) {
z += std::lower_bound(v.begin(), v.end(), queries[j]) - v.begin();
}
}
}
gettimeofday(&end, NULL);
double startUsec = start.tv_sec * 1000000 + start.tv_usec;
double endUsec = end.tv_sec * 1000000 + end.tv_usec;
std::cerr << z << std::endl;
std::cerr << "Time taken: " << (endUsec - startUsec) << " u-second" << endl;
std::cerr << "Time per iteration: " << (endUsec - startUsec) / (nQueries * reps) << " usec" << endl;
printf("%d\t%d\t%f\n", static_cast<int>(ceil(log(N) / log(2.0))), N,
(endUsec - startUsec) / (nQueries * reps));
}
# You can execute this script online along with the data in
# data.txt by visiting http://gnuplot.respawned.com/
# Scale font and line width (dpi) by changing the size! It will always display stretched.
set terminal svg size 400,300 enhanced fname 'arial' fsize 10 butt solid
set output 'out.svg'
# Key means label...
set key inside bottom right
set xlabel 'log(N)'
set ylabel 'Time/Iter (usec)'
set title 'Time to search for a key in a static sorted array of size N'
plot "data.txt" using 1:5 title 'l2Search' with linespoints, \
"data.txt" using 1:3 title 'std::lower\_bound' with linespoints
#!/bin/bash
# Run as:
# ./run_lb_test.sh # to run using std::lower_bount
# ./run_lb_test.sh l2search # to run using the 2-level search algorithm
set -e
set -u
REPS=10
NQUERIES=100000
N=$((8 * 1024 * 1024))
g++ lb_test.cpp -O3 -lm -o lb_test -std=c++0x
while [ $N -lt 2000000000 ]
do
./lb_test $REPS $N $NQUERIES "$@"
N=$((N*2))
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment