Skip to content

Instantly share code, notes, and snippets.

@ChrisRackauckas
Last active January 29, 2017 15:28
Show Gist options
  • Save ChrisRackauckas/1089987db46443e14ae8b0145ed4c69e to your computer and use it in GitHub Desktop.
Save ChrisRackauckas/1089987db46443e14ae8b0145ed4c69e to your computer and use it in GitHub Desktop.
len = 10000
function nosimd1(x,y,z,w)
@inbounds for i in eachindex(x)
x[i] += z[i]
y[i] += w[i]
end
end
function nosimd2(x,y,z,w)
@inbounds for i in eachindex(x)
x[i] += z[i]
end
@inbounds for i in eachindex(x)
y[i] += w[i]
end
end
function simd1(x,y,z,w)
@inbounds @simd for i in eachindex(x)
x[i] += z[i]
y[i] += w[i]
end
end
function simd2(x,y,z,w)
@inbounds @simd for i in eachindex(x)
x[i] += z[i]
end
@inbounds @simd for i in eachindex(x)
y[i] += w[i]
end
end
using BenchmarkTools
srand(100)
x = rand(len)
y = rand(len)
z = rand(len)
w = rand(len)
@benchmark nosimd1(x,y,z,w)
BenchmarkTools.Trial:
memory estimate: 0.00 bytes
allocs estimate: 0
--------------
minimum time: 7.909 μs (0.00% GC)
median time: 7.997 μs (0.00% GC)
mean time: 8.096 μs (0.00% GC)
maximum time: 20.351 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 4
time tolerance: 5.00%
memory tolerance: 1.00%
srand(100)
x = rand(len)
y = rand(len)
z = rand(len)
w = rand(len)
@benchmark nosimd2(x,y,z,w)
BenchmarkTools.Trial:
memory estimate: 0.00 bytes
allocs estimate: 0
--------------
minimum time: 8.122 μs (0.00% GC)
median time: 8.215 μs (0.00% GC)
mean time: 8.353 μs (0.00% GC)
maximum time: 17.972 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 3
time tolerance: 5.00%
memory tolerance: 1.00%
srand(100)
x = rand(len)
y = rand(len)
z = rand(len)
w = rand(len)
@benchmark simd1(x,y,z,w)
BenchmarkTools.Trial:
memory estimate: 0.00 bytes
allocs estimate: 0
--------------
minimum time: 8.838 μs (0.00% GC)
median time: 8.993 μs (0.00% GC)
mean time: 9.260 μs (0.00% GC)
maximum time: 45.303 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 3
time tolerance: 5.00%
memory tolerance: 1.00%
srand(100)
x = rand(len)
y = rand(len)
z = rand(len)
w = rand(len)
@benchmark simd2(x,y,z,w)
BenchmarkTools.Trial:
memory estimate: 0.00 bytes
allocs estimate: 0
--------------
minimum time: 8.379 μs (0.00% GC)
median time: 8.461 μs (0.00% GC)
mean time: 8.585 μs (0.00% GC)
maximum time: 18.038 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 3
time tolerance: 5.00%
memory tolerance: 1.00%
@inline function test_inbounds(x,y,z,w,i)
x[i] += z[i]
y[i] += w[i]
end
function nosimd3(x,y,z,w)
@inbounds for i in eachindex(x)
test_inbounds(x,y,z,w,i)
end
end
srand(100)
x = rand(len)
y = rand(len)
z = rand(len)
w = rand(len)
@benchmark nosimd3(x,y,z,w)
BenchmarkTools.Trial:
memory estimate: 0.00 bytes
allocs estimate: 0
--------------
minimum time: 8.260 μs (0.00% GC)
median time: 8.324 μs (0.00% GC)
mean time: 8.446 μs (0.00% GC)
maximum time: 19.281 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 3
time tolerance: 5.00%
memory tolerance: 1.00%
function nosimd4(x,y,z,w)
for i in eachindex(x)
test_inbounds(x,y,z,w,i)
end
end
srand(100)
x = rand(len)
y = rand(len)
z = rand(len)
w = rand(len)
@benchmark nosimd4(x,y,z,w)
BenchmarkTools.Trial:
memory estimate: 0.00 bytes
allocs estimate: 0
--------------
minimum time: 18.800 μs (0.00% GC)
median time: 18.992 μs (0.00% GC)
mean time: 19.449 μs (0.00% GC)
maximum time: 49.651 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 1
time tolerance: 5.00%
memory tolerance: 1.00%
function nosimd5(x,y,z,w)
@inbounds @fastmath for i in eachindex(x)
test_inbounds(x,y,z,w,i)
end
end
srand(100)
x = rand(len)
y = rand(len)
z = rand(len)
w = rand(len)
@benchmark nosimd5(x,y,z,w)
BenchmarkTools.Trial:
memory estimate: 0.00 bytes
allocs estimate: 0
--------------
minimum time: 7.833 μs (0.00% GC)
median time: 7.919 μs (0.00% GC)
mean time: 7.997 μs (0.00% GC)
maximum time: 15.480 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 4
time tolerance: 5.00%
memory tolerance: 1.00%
function simd3(x,y,z,w)
@inbounds @fastmath for i in eachindex(x)
x[i] += z[i]
end
@inbounds @fastmath for i in eachindex(x)
y[i] += w[i]
end
end
srand(100)
x = rand(len)
y = rand(len)
z = rand(len)
w = rand(len)
@benchmark simd3(x,y,z,w)
BenchmarkTools.Trial:
memory estimate: 0.00 bytes
allocs estimate: 0
--------------
minimum time: 9.557 μs (0.00% GC)
median time: 10.067 μs (0.00% GC)
mean time: 11.115 μs (0.00% GC)
maximum time: 23.846 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 3
time tolerance: 5.00%
memory tolerance: 1.00%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment