Last active
December 18, 2015 09:29
-
-
Save joshuawscott/5761300 to your computer and use it in GitHub Desktop.
Optimized Binary Search for a sorted array - returns the index of the element. 6x faster than Array#bsearch for arrays of strings.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Array | |
# Returns the index number of a matching element. | |
# The returned element is not guaranteed. | |
# target must implement #<=> | |
# example: | |
# arr = [2,3,5,7,11,13,17,19,23,29] | |
# arr.bsearch_index(2) => 0 | |
# arr.bsearch_index(23) => 8 | |
# arr.bsearch_index(10) => nil | |
# nil always is nil: | |
# arr = ['a','b',nil] | |
# arr.bsearch_index(nil) => nil | |
def bsearch_index(target) | |
low = 0; high = length - 1 | |
until low > high | |
mid = low + ((high - low) / 2) | |
cmp = target.<=>(self[mid]) | |
if cmp == 1 | |
high = mid - 1 | |
elsif cmp == -1 | |
low = mid + 1 | |
elsif cmp == 0 | |
return mid | |
else | |
return nil | |
end | |
end | |
return nil | |
end | |
end | |
require 'benchmark' | |
sparse_array = [] #few dups | |
1_000_000.times { sparse_array << rand(100_000_000) } | |
sparse_array.sort! | |
mid_array = [] # many dups | |
1_000_000.times { mid_array << rand(1_000_000) } | |
mid_array.sort! | |
dense_array = [] # all dups | |
1_000_000.times { dense_array << rand(1_000) } | |
dense_array.sort! | |
sparse_search_array = [] | |
mid_search_array = [] | |
dense_search_array = [] | |
500_000.times { sparse_search_array << sparse_array[rand(1_000_000)] ; sparse_search_array << rand(100_000_000) } | |
500_000.times { mid_search_array << mid_array[rand(1_000_000)] ; mid_search_array << rand(100_000_000) } | |
500_000.times { dense_search_array << dense_array[rand(1_000_000)] ; dense_search_array << rand(100_000_000) } | |
string_array = ('aaaa'..'zzzz').to_a | |
Benchmark.bmbm do |x| | |
x.report("bsearch_index sparse") { 1_000_000.times { |i| sparse_array.bsearch_index(sparse_search_array[i]) } } | |
x.report("bsearch any sparse") { 1_000_000.times { |i| sparse_array.bsearch { |e| e <=> sparse_search_array[i]} } } | |
x.report("bsearch min sparse") { 1_000_000.times { |i| sparse_array.bsearch { |e| e == sparse_search_array[i]} } } | |
x.report("bsearch_index mid") { 1_000_000.times { |i| mid_array.bsearch_index(mid_search_array[i]) } } | |
x.report("bsearch any mid") { 1_000_000.times { |i| mid_array.bsearch { |e| e <=> mid_search_array[i]} } } | |
x.report("bsearch min mid") { 1_000_000.times { |i| mid_array.bsearch { |e| e == mid_search_array[i]} } } | |
x.report("bsearch_index dense") { 1_000_000.times { |i| dense_array.bsearch_index(dense_search_array[i]) } } | |
x.report("bsearch any dense") { 1_000_000.times { |i| dense_array.bsearch { |e| e <=> dense_search_array[i]} } } | |
x.report("bsearch min dense") { 1_000_000.times { |i| dense_array.bsearch { |e| e == dense_search_array[i]} } } | |
x.report("bsearch_index strings") { 1_000_000.times { |i| dense_array.bsearch_index(string_array[i]) } } | |
x.report("bsearch any strings") { 1_000_000.times { |i| dense_array.bsearch { |e| e <=> string_array[i]} } } | |
x.report("bsearch min strings") { 1_000_000.times { |i| dense_array.bsearch { |e| e == string_array[i]} } }end | |
# Returns: | |
user system total real | |
bsearch_index sparse 3.430000 0.000000 3.430000 ( 3.437059) | |
bsearch any sparse 1.950000 0.000000 1.950000 ( 1.957959) | |
bsearch min sparse 1.440000 0.000000 1.440000 ( 1.442488) | |
bsearch_index mid 3.130000 0.010000 3.140000 ( 3.126290) | |
bsearch any mid 1.990000 0.000000 1.990000 ( 1.993189) | |
bsearch min mid 1.460000 0.000000 1.460000 ( 1.459362) | |
bsearch_index dense 2.280000 0.000000 2.280000 ( 2.284337) | |
bsearch any dense 2.000000 0.000000 2.000000 ( 2.001197) | |
bsearch min dense 1.450000 0.000000 1.450000 ( 1.452665) | |
bsearch_index strings 0.560000 0.000000 0.560000 ( 0.561603) | |
bsearch any strings 3.040000 0.000000 3.040000 ( 3.039882) | |
bsearch min strings 3.450000 0.000000 3.450000 ( 3.454454) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'test/unit' | |
class TestBsearchIndex < Test::Unit::TestCase | |
def test_empty_array | |
assert_nil([].bsearch_index('myString')) | |
end | |
def test_single_element_matching_array | |
assert_equal(0, ['myString'].bsearch_index('myString')) | |
end | |
def test_single_element_nonmatching_array | |
assert_nil(['myString'].bsearch_index('Other String')) | |
end | |
def test_case_sensitive | |
assert_nil(['mystring'].bsearch_index('MyString')) | |
end | |
def test_longer_array | |
array = %w{0 a aa aaa ab abb b d q y} | |
assert_equal(0, array.bsearch_index('0')) | |
assert_equal(9, array.bsearch_index('y')) | |
assert_equal(1, array.bsearch_index('a')) | |
assert_equal(6, array.bsearch_index('b')) | |
assert_nil(array.bsearch_index('1')) | |
assert_nil(array.bsearch_index('z')) | |
end | |
def test_integer_array | |
array = [2,3,5,7,11,13,17,23] | |
assert_equal 0, array.bsearch_index(2) | |
assert_equal 7, array.bsearch_index(23) | |
assert_nil array.bsearch_index(1) | |
assert_nil array.bsearch_index(8) | |
end | |
def test_nil | |
array = ["01", "01", "02", "02", "03", "03", "04", "04", "05", "05", "07", "07", "08", "08", "09", "09", "10", "10", "11", "11", "12", "12", "13", "13", "14", "14", nil, nil] | |
assert_nil array.bsearch_index(nil) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment