Skip to content

Instantly share code, notes, and snippets.

@mauro3
Last active August 29, 2015 14:02
Show Gist options
  • Save mauro3/246432bd4f2943e3dfd5 to your computer and use it in GitHub Desktop.
Save mauro3/246432bd4f2943e3dfd5 to your computer and use it in GitHub Desktop.
# Binary and linear search are about equally fast for a haystack of
# size ~38 (Array{I<:Integer}). For Array{Float64} it's about size ~15.
# For arrays of pointers binsearch probably is always faster.
#
# For Array(Int}
# For n=1 to 9, linear search takes about 50% of binary search
#
# For n=10 to 20, linear search takes about 75% of binary search
#
# Now, this chap finds that in C linear search is faster up to array size ~ 64
# http://schani.wordpress.com/2010/04/30/linear-vs-binary-search/
#
#
#
# Range-search is quite slow, not sure why.
# function searchlinear(haystack, needle)
# i = 1
# @inbounds for v in haystack
# if v==needle
# return i
# else
# i += 1
# end
# end
# return 0
# end
function searchlinear(haystack, needle)
# fastest
@inbounds for i =1:length(haystack)
if haystack[i]==needle
return i
end
end
return 0
end
# function searchlinear(haystack, needle)
# i = 1
# @inbounds while i<=length(haystack)
# if haystack[i]==needle
# return i
# else
# i+=1
# end
# end
# return 0
# end
function searchlinear_enum(haystack, needle)
## enumerate is slow!
@inbounds for (i,v) in enumerate(haystack)
if v==needle
return i
end
end
return 0
end
function searchlinear2(v, needle)
# http://schani.wordpress.com/2010/04/30/linear-vs-binary-search/
# only faster for arrays > 80 at which point binary search is faster anyway.
local i::Int = 1
lv = length(v)
if lv<6
while i<=lv
@inbounds begin
if v[i]==needle; return i end
i += 1
end
end
return 0
else
@simd for i=1:6:lv-5
@inbounds begin
if v[i]==needle; return i end
if v[i+1]==needle; return i+1 end
if v[i+2]==needle; return i+2 end
if v[i+3]==needle; return i+3 end
if v[i+4]==needle; return i+4 end
if v[i+5]==needle; return i+5 end
end
end
while i<=lv
@inbounds begin
if v[i]==needle; return i end
i += 1
end
end
return 0
end
end
function searchrange(haystack::Range, needle)
(i,rem) = divrem(needle - first(haystack), step(haystack))
(rem==0 && 1<=i+1<=length(haystack)) ? i+1 : 0
end
function searchrange_faster(haystack::Range, needle)
## faster
di = (needle - first(haystack))/ step(haystack)
i = ifloor(di)
(di-i==0 && 1<=i+1<=length(haystack)) ? i+1 : 0
end
function binarysearch(v::AbstractVector, x)
# Finds the first occurrence of x
lo = 0
hi = length(v)+1
hi2 = hi
@inbounds while lo < hi-1
m = (lo+hi)>>>1
if v[m] < x
lo = m
else
hi = m
end
end
(hi==hi2 || v[hi]!=x) ? 0 : hi
end
function binarysearch(v::AbstractVector, x, lo::Int, hi::Int)
# O(log n). Finds the first occurrence of x
lo = lo-1
hi2 = hi
hi = hi+1
@inbounds while lo < hi-1
m = (lo+hi)>>>1
if v[m] < x
lo = m
else
hi = m
end
end
(hi==length(v)+1 || v[hi]!=x) ? 0 : hi
end
function bins(haystack, needles)
acc = zero(eltype(haystack))
for n in needles
acc += binarysearch(haystack, n)
end
acc
end
function lins(haystack, needles)
acc = zero(eltype(haystack))
for n in needles
acc += searchlinear(haystack, n)
end
acc
end
function lins2(haystack, needles)
acc = zero(eltype(haystack))
for n in needles
acc += searchlinear2(haystack, n)
end
acc
end
function lins_enum(haystack, needles)
acc = zero(eltype(haystack))
for n in needles
acc += searchlinear_enum(haystack, n)
end
acc
end
function rans(haystack, needles)
acc = zero(eltype(haystack))
for n in needles
acc += searchrange(haystack, n)
end
acc
end
function rans_faster(haystack, needles)
acc = zero(eltype(haystack))
for n in needles
acc += searchrange_faster(haystack, n)
end
acc
end
@time 1 # @time warmup
# length of haystack:
n = 103#10^3
fac = 2
# number of needles:
nn = 10^7
haystack = rand(1:fac*n,n);
sort!(haystack);
needles = rand(1:fac*n,nn);
# print("binsearch ")
# a = bins(haystack, [1]) # warm-up
# @time acc1 = bins(haystack, needles)
# if n<=10^3
# print("linear search ")
# a = lins(haystack, [1]) # warm-up
# @time acc2 = lins(haystack, needles)
# assert(acc1==acc2)
# print("linear search 2 ")
# a = lins2(haystack, [1]) # warm-up
# @time acc2 = lins2(haystack, needles)
# assert(acc1==acc2)
# print("linear search enum ")
# a = lins_enum(haystack, [1]) # warm-up
# @time acc2 = lins_enum(haystack, needles)
# assert(acc1==acc2)
# end
# print("search in a range ")
# rhaystack = 1:fac:fac*n;
# a = rans(rhaystack, [1]) # warm-up
# @time acc3 = rans(rhaystack, needles)
# print("search in a range faster ")
# rhaystack = 1:fac:fac*n;
# a = rans_faster(rhaystack, [1]) # warm-up
# @time acc4 = rans_faster(rhaystack, needles)
# assert(acc3==acc4)
a = bins(haystack, [1]) # warm-up
a = lins(haystack, [1]) # warm-up
a = lins2(haystack, [1]) # warm-up
for n in 1:5:50 #int([1:10].^2)
# nn = 10^5
# haystack = BigInt[BigInt(i) for i in rand(1:fac*n,n)]
# needles = BigInt[BigInt(i) for i in rand(1:fac*n,nn)]
haystack = (rand(1:fac*n,n))
needles = (rand(1:fac*n,nn))
sort!(haystack);
println(" ")
println("Haystack = $n")
print("binsearch ")
@time acc1 = bins(haystack, needles)
print("linear search ")
@time acc2 = lins(haystack, needles)
# print("linear search 2 ")
# @time acc3 = lins2(haystack, needles)
# @assert acc1==acc2==acc3
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment