Created
May 13, 2016 21:05
-
-
Save eskil/fcd6890403d3b9b707ecd15be17c050f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ | |
// | |
// (C) Eskil Heyn Olsen 2009 | |
// | |
#include <iostream> | |
#include <vector> | |
#include <utility> | |
#include <iterator> | |
#include <algorithm> | |
//#define DEBUG | |
namespace { | |
template<typename Iter, typename PredT> | |
struct find_upper_edge_split { | |
private: | |
typedef std::pair<size_t, size_t> range_t; | |
public: | |
find_upper_edge_split (std::vector<range_t> &ranges, Iter from, Iter to, ptrdiff_t &good, ptrdiff_t &bad, PredT pred) | |
: m_ranges (ranges), m_from (from), m_to (to), m_good (good), m_bad (bad), m_pred (pred) | |
{ } | |
// Not const, since otherwise m_pred would have to be const | |
void operator() (range_t range) { | |
ptrdiff_t count = range.second - range.first; | |
ptrdiff_t step = count/2; | |
ptrdiff_t pos = range.first + step; | |
Iter it = m_from; | |
std::advance (it, pos); | |
// Optimisation - if we've encountered an earlier bad entry, | |
// there's no reason to check later ranges. This happens when an inaccessible entry | |
// causes a split, and when checking the two new ranges, the first one has a bad entry. | |
// No reason to check the second. | |
if (m_bad < pos) { | |
return; | |
} | |
#ifdef DEBUG | |
std::cout << "\tchecking " << pos << "\n"; | |
#endif /* DEBUG */ | |
int result = m_pred (*it); | |
if (result == 0) { | |
m_good = std::max (m_good, std::distance (m_from, it)); | |
range.first = pos; | |
#ifdef DEBUG | |
std::cout << "\tchecking " << pos << " is good, new range = (" << range.first << "," << range.second << ")\n"; | |
#endif /* DEBUG */ | |
m_ranges.push_back (range); | |
} else if (result > 0) { | |
m_bad = std::min (m_bad, std::distance (m_from, it)); | |
range.second = pos; | |
#ifdef DEBUG | |
std::cout << "\tchecking " << pos << " is bad, new range = (" << range.first << "," << range.second << ")\n"; | |
#endif /* DEBUG */ | |
m_ranges.push_back (range); | |
} else { | |
range_t a (range.first, range.first + step ); | |
range_t b (range.first + step, range.second); | |
#ifdef DEBUG | |
std::cout << "\tchecking " << pos << " is inaccessible, new ranges = (" << a.first << "," << a.second << ") and (" << b.first << "," << b.second << ")\n"; | |
#endif /* DEBUG */ | |
// Optimisation, skip inserting ranges of size 1 | |
if (a.second - a.first > 1) { m_ranges.push_back (a); } | |
if (b.second - b.first > 1) { m_ranges.push_back (b); } | |
} | |
} | |
private: | |
std::vector<range_t> &m_ranges; | |
Iter m_from, m_to; | |
ptrdiff_t &m_good, &m_bad; | |
PredT m_pred; | |
}; | |
template<typename T> | |
void print_range (T t) { | |
std::cout << "(" << t.first << "," << t.second << ")"; | |
} | |
template<typename T> | |
bool is_distance_1 (T range) { | |
return (range.second - range.first) == 1; | |
} | |
} | |
// | |
// find_upper_edge | |
// | |
// PredT is a predicate, returns int, three states, 0 = good >0 = bad <0 = inaccessible | |
// | |
// Algorithm is roughly a binary search but doing divide and conquer | |
// when an element cannot be checked (inaccesible). E.g. the case of | |
// applying binary search to source repository to find a regression | |
// can be thrown off course by hitting unbuildable/untestable | |
// revisions - aka the predicate cannot be tested. Likewise, linear | |
// search is undesirable since build/test time may be huge. | |
// | |
// 1: Given a range of size m (m > 1) (1...m), | |
// 2: Set State to contain range (1...m) | |
// | |
// 3: for each range R (k...l) in state | |
// 4: check pos p = m/2, if good, remember store p in A, replace R with (p...l) (bin search right) | |
// 5: if bad, remember store p in B, replace R with (k...p) (bin search left) | |
// 6: if not accessible, replace R with two ranges (k...p) and (p...l) (divide & conquer) | |
// 7: remove all ranges (e...f) for which f-e == 1 (optional optimisation is to not insert them in step 6) | |
// 8: if state is nonempty, repeat from 3. | |
// | |
// 9: Result is range (A...B) | |
// | |
// Open questions: | |
// | |
// Q: On step 4 and 5, should A = std::max (A, p) and B = std::min (B, p) ? | |
// | |
// Q: Can we optimise in step 5 by removing ranges in states to the | |
// right of p, once we now know that everything to the right of p | |
// is not the edge ? | |
// | |
// Q: ditto in 4, but that opens the question ; are we finding the | |
// last or first edge ? For the build issue, we care about the | |
// last. In which case, setting A and B gets tricky... | |
// | |
// Q: In step 3, would it be beneficial (on the average case) to go | |
// through ranges in a different order than linear ? | |
// | |
// | |
template<typename Iter, typename PredT> | |
std::pair<Iter, Iter> find_upper_edge (Iter from, Iter to, PredT pred) { | |
size_t iterations = 0; | |
size_t compares = 0; | |
typedef std::pair<size_t, size_t> range_t; | |
std::vector<range_t> ranges; | |
ptrdiff_t good = 0; | |
ptrdiff_t bad = std::distance (from, to); | |
ranges.push_back (std::make_pair (good, bad)); | |
#ifdef DEBUG | |
std::cout << "Start: "; | |
std::for_each (ranges.begin (), ranges.end (), print_range<range_t>); | |
std::cout << "\n"; | |
#endif /* DEBUG */ | |
while (!ranges.empty ()) { | |
compares += ranges.size (); | |
// Sweep | |
std::vector<range_t> new_ranges; | |
new_ranges.reserve (ranges.size ()); | |
std::for_each (ranges.begin (), ranges.end (), | |
find_upper_edge_split<Iter, PredT> (new_ranges, from, to, good, bad, pred)); | |
ranges = new_ranges; | |
++iterations; | |
#ifdef DEBUG | |
std::cout << "\tNew Ranges: "; | |
std::for_each (ranges.begin (), ranges.end (), print_range<range_t>); | |
std::cout << "\n"; | |
#endif /* DEBUG */ | |
ranges.erase (std::remove_if (ranges.begin (), ranges.end (), is_distance_1<range_t>), ranges.end ()); | |
#ifdef DEBUG | |
std::cout << "\tTrimmed Ranges: "; | |
std::for_each (ranges.begin (), ranges.end (), print_range<range_t>); | |
std::cout << "\n\tGood = " << good << " bad = " << bad << " iterations = " << iterations << "\n"; | |
std::cout << "\n"; | |
#endif /* DEBUG */ | |
} | |
//#ifdef DEBUG | |
std::cout << "Done, searched " << std::distance (from, to) << " elements " | |
<< " in " << iterations << " iterations " | |
<< "with " << compares << " compares\n"; | |
//#endif /* DEBUG */ | |
return std::make_pair (from + good, from + bad); | |
} | |
int predicate (int v) { | |
if (v == 0) { | |
return 1; | |
} else if (v == 1) { | |
return 0; | |
} | |
return -1; | |
} | |
void test_find_upper_edge (int *range, size_t a, size_t b) { | |
typedef std::pair<size_t, size_t> range_t; | |
range_t expect (a, b); | |
size_t sz = 0; | |
for (size_t i = 0; range[i] != 9; ++i) { sz = i; } | |
std::pair<int*, int*> result = find_upper_edge (&range[0], &range[sz], predicate); | |
range_t got (std::distance (&range[0], result.first), std::distance (&range[0], result.second)); | |
if (got == expect) { | |
std::cout << "*** Got (" << got.first << "," << got.second << "), ok\n"; | |
} else { | |
std::cout << "*** Got (" << got.first << "," << got.second << ") " | |
<< "expected (" << expect.first << "," << expect.second << ")\n"; | |
assert (got == expect); | |
} | |
} | |
int main (int argc, char *argv[]) { | |
// 0 is bad | |
// 1 is good | |
// 2 is inaccessible | |
// 9 is end | |
int a[] = { 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (a, 3, 8); | |
int b0[] = { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (b0, 6, 7); | |
int b1[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (b1, 7, 8); | |
int b2[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (b2, 8, 9); | |
int b3[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (b3, 9, 10); | |
int c[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (c, 7, 8); | |
int d0[] = { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (d0, 14, 15); | |
int d1[] = { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 0, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (d1, 15, 16); | |
int d2[] = { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (d2, 16, 17); | |
int e[] = { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (e, 14, 20); | |
int f[] = { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (f, 6, 14); | |
// 0 5 10 15 20 25 30 32 | |
int g[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (g, 20, 21); | |
// 0 5 10 15 20 25 30 32 | |
int h[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (h, 14, 23); | |
// 0 5 10 15 20 25 30 32 | |
int i[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (i, 24, 26); | |
// 0 5 10 15 20 25 30 32 | |
int i2[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 9 }; | |
test_find_upper_edge (i2, 14, 32); | |
// 0 5 10 15 20 25 30 32 | |
int i3[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 9 }; | |
test_find_upper_edge (i3, 14, 28); | |
// 0 5 10 15 20 25 30 32 | |
int i4[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 9 }; | |
test_find_upper_edge (i4, 14, 21); | |
// 0 5 10 15 20 25 30 32 | |
int i5[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 9 }; | |
test_find_upper_edge (i5, 21, 28); | |
// 0 5 10 15 20 25 30 32 | |
int j[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (j, 14, 15); | |
// This is an invalid case, since the range is already unordered | |
// It however happens to work if at least the last element is bad, | |
// in which case it'll find the last edge | |
// 0 5 10 15 20 25 30 32 | |
int k0[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (k0, 27, 28); | |
int k1[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 9 }; | |
test_find_upper_edge (k1, 31, 32); | |
// Last element is good, fail | |
//int k2[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9 }; | |
//test_find_upper_edge (k2, 21, 22); | |
// Last element it inaccessible, fail | |
//int k3[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 9 }; | |
//test_find_upper_edge (k3, 21, 22); | |
// 0 5 10 15 20 25 30 32 | |
int l[] = { 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 9 }; | |
test_find_upper_edge (l, 0, 32); | |
// 0 5 10 15 20 25 30 32 | |
int m[] = { 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 9 }; | |
test_find_upper_edge (m, 10, 32); | |
// 0 5 10 15 20 25 30 32 | |
int n[] = { 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 9 }; | |
test_find_upper_edge (n, 0, 24); | |
// 0 5 10 15 20 25 30 32 | |
int o[] = { 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 9 }; | |
test_find_upper_edge (o, 9, 24); | |
// Corner case to test short circuit when early bad build is found, but inaccessible builds left behind some splits | |
// 0 5 10 15 20 25 30 32 | |
int p[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 9 }; | |
test_find_upper_edge (p, 11, 12); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment