Last active
August 29, 2015 14:02
-
-
Save mauro3/246432bd4f2943e3dfd5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Binary and linear search are about equally fast for a haystack of | |
# size ~38 (Array{I<:Integer}). For Array{Float64} it's about size ~15. | |
# For arrays of pointers binsearch probably is always faster. | |
# | |
# For Array(Int} | |
# For n=1 to 9, linear search takes about 50% of binary search | |
# | |
# For n=10 to 20, linear search takes about 75% of binary search | |
# | |
# Now, this chap finds that in C linear search is faster up to array size ~ 64 | |
# http://schani.wordpress.com/2010/04/30/linear-vs-binary-search/ | |
# | |
# | |
# | |
# Range-search is quite slow, not sure why. | |
# function searchlinear(haystack, needle) | |
# i = 1 | |
# @inbounds for v in haystack | |
# if v==needle | |
# return i | |
# else | |
# i += 1 | |
# end | |
# end | |
# return 0 | |
# end | |
function searchlinear(haystack, needle) | |
# fastest | |
@inbounds for i =1:length(haystack) | |
if haystack[i]==needle | |
return i | |
end | |
end | |
return 0 | |
end | |
# function searchlinear(haystack, needle) | |
# i = 1 | |
# @inbounds while i<=length(haystack) | |
# if haystack[i]==needle | |
# return i | |
# else | |
# i+=1 | |
# end | |
# end | |
# return 0 | |
# end | |
function searchlinear_enum(haystack, needle) | |
## enumerate is slow! | |
@inbounds for (i,v) in enumerate(haystack) | |
if v==needle | |
return i | |
end | |
end | |
return 0 | |
end | |
function searchlinear2(v, needle) | |
# http://schani.wordpress.com/2010/04/30/linear-vs-binary-search/ | |
# only faster for arrays > 80 at which point binary search is faster anyway. | |
local i::Int = 1 | |
lv = length(v) | |
if lv<6 | |
while i<=lv | |
@inbounds begin | |
if v[i]==needle; return i end | |
i += 1 | |
end | |
end | |
return 0 | |
else | |
@simd for i=1:6:lv-5 | |
@inbounds begin | |
if v[i]==needle; return i end | |
if v[i+1]==needle; return i+1 end | |
if v[i+2]==needle; return i+2 end | |
if v[i+3]==needle; return i+3 end | |
if v[i+4]==needle; return i+4 end | |
if v[i+5]==needle; return i+5 end | |
end | |
end | |
while i<=lv | |
@inbounds begin | |
if v[i]==needle; return i end | |
i += 1 | |
end | |
end | |
return 0 | |
end | |
end | |
function searchrange(haystack::Range, needle) | |
(i,rem) = divrem(needle - first(haystack), step(haystack)) | |
(rem==0 && 1<=i+1<=length(haystack)) ? i+1 : 0 | |
end | |
function searchrange_faster(haystack::Range, needle) | |
## faster | |
di = (needle - first(haystack))/ step(haystack) | |
i = ifloor(di) | |
(di-i==0 && 1<=i+1<=length(haystack)) ? i+1 : 0 | |
end | |
function binarysearch(v::AbstractVector, x) | |
# Finds the first occurrence of x | |
lo = 0 | |
hi = length(v)+1 | |
hi2 = hi | |
@inbounds while lo < hi-1 | |
m = (lo+hi)>>>1 | |
if v[m] < x | |
lo = m | |
else | |
hi = m | |
end | |
end | |
(hi==hi2 || v[hi]!=x) ? 0 : hi | |
end | |
function binarysearch(v::AbstractVector, x, lo::Int, hi::Int) | |
# O(log n). Finds the first occurrence of x | |
lo = lo-1 | |
hi2 = hi | |
hi = hi+1 | |
@inbounds while lo < hi-1 | |
m = (lo+hi)>>>1 | |
if v[m] < x | |
lo = m | |
else | |
hi = m | |
end | |
end | |
(hi==length(v)+1 || v[hi]!=x) ? 0 : hi | |
end | |
function bins(haystack, needles) | |
acc = zero(eltype(haystack)) | |
for n in needles | |
acc += binarysearch(haystack, n) | |
end | |
acc | |
end | |
function lins(haystack, needles) | |
acc = zero(eltype(haystack)) | |
for n in needles | |
acc += searchlinear(haystack, n) | |
end | |
acc | |
end | |
function lins2(haystack, needles) | |
acc = zero(eltype(haystack)) | |
for n in needles | |
acc += searchlinear2(haystack, n) | |
end | |
acc | |
end | |
function lins_enum(haystack, needles) | |
acc = zero(eltype(haystack)) | |
for n in needles | |
acc += searchlinear_enum(haystack, n) | |
end | |
acc | |
end | |
function rans(haystack, needles) | |
acc = zero(eltype(haystack)) | |
for n in needles | |
acc += searchrange(haystack, n) | |
end | |
acc | |
end | |
function rans_faster(haystack, needles) | |
acc = zero(eltype(haystack)) | |
for n in needles | |
acc += searchrange_faster(haystack, n) | |
end | |
acc | |
end | |
@time 1 # @time warmup | |
# length of haystack: | |
n = 103#10^3 | |
fac = 2 | |
# number of needles: | |
nn = 10^7 | |
haystack = rand(1:fac*n,n); | |
sort!(haystack); | |
needles = rand(1:fac*n,nn); | |
# print("binsearch ") | |
# a = bins(haystack, [1]) # warm-up | |
# @time acc1 = bins(haystack, needles) | |
# if n<=10^3 | |
# print("linear search ") | |
# a = lins(haystack, [1]) # warm-up | |
# @time acc2 = lins(haystack, needles) | |
# assert(acc1==acc2) | |
# print("linear search 2 ") | |
# a = lins2(haystack, [1]) # warm-up | |
# @time acc2 = lins2(haystack, needles) | |
# assert(acc1==acc2) | |
# print("linear search enum ") | |
# a = lins_enum(haystack, [1]) # warm-up | |
# @time acc2 = lins_enum(haystack, needles) | |
# assert(acc1==acc2) | |
# end | |
# print("search in a range ") | |
# rhaystack = 1:fac:fac*n; | |
# a = rans(rhaystack, [1]) # warm-up | |
# @time acc3 = rans(rhaystack, needles) | |
# print("search in a range faster ") | |
# rhaystack = 1:fac:fac*n; | |
# a = rans_faster(rhaystack, [1]) # warm-up | |
# @time acc4 = rans_faster(rhaystack, needles) | |
# assert(acc3==acc4) | |
a = bins(haystack, [1]) # warm-up | |
a = lins(haystack, [1]) # warm-up | |
a = lins2(haystack, [1]) # warm-up | |
for n in 1:5:50 #int([1:10].^2) | |
# nn = 10^5 | |
# haystack = BigInt[BigInt(i) for i in rand(1:fac*n,n)] | |
# needles = BigInt[BigInt(i) for i in rand(1:fac*n,nn)] | |
haystack = (rand(1:fac*n,n)) | |
needles = (rand(1:fac*n,nn)) | |
sort!(haystack); | |
println(" ") | |
println("Haystack = $n") | |
print("binsearch ") | |
@time acc1 = bins(haystack, needles) | |
print("linear search ") | |
@time acc2 = lins(haystack, needles) | |
# print("linear search 2 ") | |
# @time acc3 = lins2(haystack, needles) | |
# @assert acc1==acc2==acc3 | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment