Last active
August 29, 2015 14:15
-
-
Save seckcoder/328d60120425db663051 to your computer and use it in GitHub Desktop.
bug free binary search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* | |
* 最简单的二分查找. 在一个数组里面找是否存在某个数 | |
* A是数组,在A[p-r]里面找是否存在某个数, 存在返回index,否则返回-1 | |
* | |
*/ | |
int binSearch(int A[], int p, int r, int v) { | |
while (p <= r) { | |
int m = (p+r)/2; | |
if (A[m] < v) { | |
p = m + 1; // 注意点: 在不相等的时候一定要+1/-1,这样可以保证考虑的数组范围在每个循环都减小。因此避免死循环。 | |
} else if (A[m] > v) { | |
r = m - 1; | |
} else { | |
// 找到了 | |
return m; | |
} | |
} | |
return -1; | |
} | |
// 下面是二分查找的变形。需要用到我的方法 | |
// | |
// 方法的关键是引入一个辅助变量mark, 来帮助在每个循环都能够缩小范围, 避免死循环 | |
/* | |
* 变形1: 在数组中找到和v相等的最小的index | |
* Example: | |
* search 2 in [1,2,2,3,3,4], return 1 | |
* search 3 in [1,2,2,3,3,4], return 3 | |
* | |
* | |
* | |
*/ | |
int binSearch(int A[], int p, int r, int v) { | |
int mark = -1; // 首先把mark设为一个不可能的值 | |
// 注意和上面一样,我们考虑的数组范围[p-r], 在每个循环后都减少了,因此避免了死循环 | |
while (p <= r) { | |
int m = (p+r)/2; | |
if (A[m] < v) { | |
p = m + 1; | |
} else if (A[m] > v) { | |
r = m - 1; | |
} else { | |
// 关键的地方。当A[m] == v时,我们怎么办? | |
mark = m; // mark保存这个值。因为m可能是我们需要的index. | |
r = m - 1; // 更新r。因为我们需要返回最小的index. | |
} | |
} | |
return mark; | |
} | |
// 思考: 如果需要返回最大的index应该怎么办? | |
// 变形2: | |
// 对于一个有序数组[1,2,3,4], 我们从中间折断为[1,2]和[3,4], 然后拼接这个 | |
// 折断后的数组为[3,4,1,2]。 | |
// | |
// 现在,假如给一个折断后拼接的数组A, 你返回这个数组最小值的index. | |
// Example: | |
// [3,4,5,1,2] -> return 3 since A[3] = 1 | |
// 假定数组中所有值都不相同(这个面试的时候要问一下面试官,如果存在相同的值的话 | |
// 这个方法不好使了) | |
// | |
// 这题是要我们找小于A[p]的值里面最小的那个。 | |
// | |
// | |
int binSearch(int A[], int p, int r) { | |
int mark = - 1; | |
int base = A[p]; // 参照值 | |
while (p <= r) { | |
int m = (p+r) / 2; | |
if (A[m] < base) { | |
mark = m; // 找到一个,用mark标记 | |
r = m-1; // 我们试试有没有更小的 | |
} else { | |
p = m + 1; | |
} | |
} | |
return mark; | |
} | |
// 思考:上面这个对于在最左边和最右边切断的case有问题。如何解决? | |
// Ans: 对于最左边和最右边切断的case, A仍然是一个单调增的有序数组。 | |
// | |
// 此时,如果使用A[p]为参照值,那么, 最终mark = -1, 因为对于任意x in A[], | |
// 有: x >= A[p] | |
// | |
// 一个想法是改成用A[r]为参照值。 | |
// | |
// 但是这依然会有问题。那就是当len(A) == 2的时候,而且从中间切断 | |
// 比如, sorted array is [1,2], => A[] is [2,1]. In this time, | |
// the array is mono dereasing. If we use A[r] as the comparison value, we have | |
// mark = -1 since for any x in A[], x >= A[r]. | |
// | |
// Personally, I like to compare it with A[r] since the only corner case | |
// is when len(A) == 2. | |
// | |
// (Note, here we should set the base value before the while loop. Suppose | |
// we don't use 'base' but A[r] in the while loop for the case {1,2,0,0,1} | |
// you will find that finally we have A[p] < A[r], i.e., A[p,r] is mono increasing. | |
// Then the assumption is broken) | |
// | |
// | |
// 思考:如果存在duplicate怎么办? | |
// | |
// Ans: We only need to change it a little. The case we need to consider now | |
// is when A{m] == base. | |
// | |
// When A[m] == base, we really don't know how to change value of p and r. | |
// | |
// For example, | |
// A[] = [2,2,3,2,2], | |
// if m == 1, then A[m] == 2, | |
// if m == 3, then A[m] also == 2. | |
// | |
// As a result, when we encounter the case A[m] == base, we need to do a linear | |
// search in the range [p,r]. | |
// | |
// Also, when A[m] == base, | |
// | |
// if mark == -1, we know for all numbers x out of range [p,r] in A[], x > A[n], | |
// so x must not be the position we want. | |
// | |
// if mark != -1, we know mark is the left most position outside the range [p,r], so | |
// that A[mark] < A[n]. | |
// | |
// So besides numbers in range [p,r], when mark != -1, we also need to consider | |
// A[mark] | |
// | |
int binSearch(int num[], int p, int r) { | |
int base = num[r]; | |
int mark = -1; | |
while (p <= r) { | |
int m = (p+r) / 2; | |
if (num[m] < base) { | |
// right | |
mark = m; | |
r = m-1; | |
} else if (num[m] > base) { | |
// left | |
p = m+1; | |
} else { | |
// A[m] == A[r], so we know we can remove A[r] safely since we still | |
// have A[m]. | |
// The corner case is when m == r(only one element in the array), | |
// so we need to update mark. | |
if (mark == -1 || num[mark] > num[r]) mark = r; | |
r--; | |
} | |
} | |
return (mark == -1)?r:mark; | |
} | |
// 思考: m = (p+r)/2 这断代码存在什么可能的问题? | |
// Ans: Possible overflow since r may be very large. | |
// Solution: m = p + (r-p)/2; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
// 思考: 如果需要返回最大的index应该怎么办?
r = m - 1 ==> p = m+1 // means check range [m+1, r] for better potential solution
// 思考:上面这个对于在最左边和最右边切断的case有问题。如何解决?
这种情况是array 就是完全sorted, 在一开始判断 A[0] 和 A[len-1]的大小 如果A[0] < A[len-1](正常sorted array) 直接返回 -1 因为第一个element就是最小的了。 不知道有没有更通用的方法
// 思考:如果存在duplicate怎么办?
如果duplicate 不是首尾字母 貌似不影响 否则就将 r -1?(因为首element还是要用来当base的, 而且末尾element反正相等 不用管,这样也才能判断 切断在哪。)