Last active
January 29, 2017 15:28
-
-
Save ChrisRackauckas/1089987db46443e14ae8b0145ed4c69e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
len = 10000 | |
function nosimd1(x,y,z,w) | |
@inbounds for i in eachindex(x) | |
x[i] += z[i] | |
y[i] += w[i] | |
end | |
end | |
function nosimd2(x,y,z,w) | |
@inbounds for i in eachindex(x) | |
x[i] += z[i] | |
end | |
@inbounds for i in eachindex(x) | |
y[i] += w[i] | |
end | |
end | |
function simd1(x,y,z,w) | |
@inbounds @simd for i in eachindex(x) | |
x[i] += z[i] | |
y[i] += w[i] | |
end | |
end | |
function simd2(x,y,z,w) | |
@inbounds @simd for i in eachindex(x) | |
x[i] += z[i] | |
end | |
@inbounds @simd for i in eachindex(x) | |
y[i] += w[i] | |
end | |
end | |
using BenchmarkTools | |
srand(100) | |
x = rand(len) | |
y = rand(len) | |
z = rand(len) | |
w = rand(len) | |
@benchmark nosimd1(x,y,z,w) | |
BenchmarkTools.Trial: | |
memory estimate: 0.00 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 7.909 μs (0.00% GC) | |
median time: 7.997 μs (0.00% GC) | |
mean time: 8.096 μs (0.00% GC) | |
maximum time: 20.351 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 4 | |
time tolerance: 5.00% | |
memory tolerance: 1.00% | |
srand(100) | |
x = rand(len) | |
y = rand(len) | |
z = rand(len) | |
w = rand(len) | |
@benchmark nosimd2(x,y,z,w) | |
BenchmarkTools.Trial: | |
memory estimate: 0.00 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 8.122 μs (0.00% GC) | |
median time: 8.215 μs (0.00% GC) | |
mean time: 8.353 μs (0.00% GC) | |
maximum time: 17.972 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 3 | |
time tolerance: 5.00% | |
memory tolerance: 1.00% | |
srand(100) | |
x = rand(len) | |
y = rand(len) | |
z = rand(len) | |
w = rand(len) | |
@benchmark simd1(x,y,z,w) | |
BenchmarkTools.Trial: | |
memory estimate: 0.00 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 8.838 μs (0.00% GC) | |
median time: 8.993 μs (0.00% GC) | |
mean time: 9.260 μs (0.00% GC) | |
maximum time: 45.303 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 3 | |
time tolerance: 5.00% | |
memory tolerance: 1.00% | |
srand(100) | |
x = rand(len) | |
y = rand(len) | |
z = rand(len) | |
w = rand(len) | |
@benchmark simd2(x,y,z,w) | |
BenchmarkTools.Trial: | |
memory estimate: 0.00 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 8.379 μs (0.00% GC) | |
median time: 8.461 μs (0.00% GC) | |
mean time: 8.585 μs (0.00% GC) | |
maximum time: 18.038 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 3 | |
time tolerance: 5.00% | |
memory tolerance: 1.00% | |
@inline function test_inbounds(x,y,z,w,i) | |
x[i] += z[i] | |
y[i] += w[i] | |
end | |
function nosimd3(x,y,z,w) | |
@inbounds for i in eachindex(x) | |
test_inbounds(x,y,z,w,i) | |
end | |
end | |
srand(100) | |
x = rand(len) | |
y = rand(len) | |
z = rand(len) | |
w = rand(len) | |
@benchmark nosimd3(x,y,z,w) | |
BenchmarkTools.Trial: | |
memory estimate: 0.00 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 8.260 μs (0.00% GC) | |
median time: 8.324 μs (0.00% GC) | |
mean time: 8.446 μs (0.00% GC) | |
maximum time: 19.281 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 3 | |
time tolerance: 5.00% | |
memory tolerance: 1.00% | |
function nosimd4(x,y,z,w) | |
for i in eachindex(x) | |
test_inbounds(x,y,z,w,i) | |
end | |
end | |
srand(100) | |
x = rand(len) | |
y = rand(len) | |
z = rand(len) | |
w = rand(len) | |
@benchmark nosimd4(x,y,z,w) | |
BenchmarkTools.Trial: | |
memory estimate: 0.00 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 18.800 μs (0.00% GC) | |
median time: 18.992 μs (0.00% GC) | |
mean time: 19.449 μs (0.00% GC) | |
maximum time: 49.651 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1 | |
time tolerance: 5.00% | |
memory tolerance: 1.00% | |
function nosimd5(x,y,z,w) | |
@inbounds @fastmath for i in eachindex(x) | |
test_inbounds(x,y,z,w,i) | |
end | |
end | |
srand(100) | |
x = rand(len) | |
y = rand(len) | |
z = rand(len) | |
w = rand(len) | |
@benchmark nosimd5(x,y,z,w) | |
BenchmarkTools.Trial: | |
memory estimate: 0.00 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 7.833 μs (0.00% GC) | |
median time: 7.919 μs (0.00% GC) | |
mean time: 7.997 μs (0.00% GC) | |
maximum time: 15.480 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 4 | |
time tolerance: 5.00% | |
memory tolerance: 1.00% | |
function simd3(x,y,z,w) | |
@inbounds @fastmath for i in eachindex(x) | |
x[i] += z[i] | |
end | |
@inbounds @fastmath for i in eachindex(x) | |
y[i] += w[i] | |
end | |
end | |
srand(100) | |
x = rand(len) | |
y = rand(len) | |
z = rand(len) | |
w = rand(len) | |
@benchmark simd3(x,y,z,w) | |
BenchmarkTools.Trial: | |
memory estimate: 0.00 bytes | |
allocs estimate: 0 | |
-------------- | |
minimum time: 9.557 μs (0.00% GC) | |
median time: 10.067 μs (0.00% GC) | |
mean time: 11.115 μs (0.00% GC) | |
maximum time: 23.846 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 3 | |
time tolerance: 5.00% | |
memory tolerance: 1.00% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment