Last active
June 6, 2017 13:20
-
-
Save bjarthur/9ae4b30c4087dcd229d9 to your computer and use it in GitHub Desktop.
extrema implementations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Base.Cartesian, Base.Test, BenchmarkTools | |
extrema_mapslices(v,region) = mapslices(extrema, v, region) | |
function extrema_functor(a::AbstractArray, dim) | |
dim = tuple(dim...) | |
mi = minimum(a,dim) | |
ma = maximum(a,dim) | |
reshape(collect(zip(mi,ma)), size(mi)) | |
end | |
function extrema_cartesian(A::AbstractArray, dims) | |
sz = [size(A)...] | |
sz[[dims...]] = 1 | |
B = Array{Tuple{eltype(A),eltype(A)}}(sz...) | |
extrema_cartesian!(B, A) | |
end | |
function extrema_cartesian_simd(A::AbstractArray, dims) | |
sz = [size(A)...] | |
sz[[dims...]] = 1 | |
B = Array{Tuple{eltype(A),eltype(A)}}(sz...) | |
extrema_cartesian_simd!(B, A) | |
end | |
function extrema_cartesianrange(A::AbstractArray, dims) | |
sz = [size(A)...] | |
sz[[dims...]] = 1 | |
B = Array{Tuple{eltype(A),eltype(A)}}(sz...) | |
extrema_cartesianrange!(B, A) | |
end | |
@generated function extrema_cartesian!(B,A::Array{T,N}) where {T,N} | |
return quote | |
sA = size(A) | |
sB = size(B) | |
@nloops $N i B begin | |
AI = @nref $N A i | |
(@nref $N B i) = (AI, AI) | |
end | |
Bmax = sB | |
@inbounds @nloops $N i A begin | |
AI = @nref $N A i | |
@nexprs $N d->(j_d = min(Bmax[d], i_{d})) | |
BJ = @nref $N B j | |
#if AI < BJ[1] | |
# (@nref $N B j) = (AI, BJ[2]) | |
#elseif AI > BJ[2] | |
# (@nref $N B j) = (BJ[1], AI) | |
#end | |
BJ = ifelse(AI < BJ[1], (AI, BJ[2]), BJ) | |
BJ = ifelse(AI > BJ[2], (BJ[1], AI), BJ) | |
(@nref $N B j) = BJ | |
end | |
return B | |
end | |
end | |
@generated function extrema_cartesian_simd!(B,A::Array{T,N}) where {T,N} | |
return quote | |
sA = size(A) | |
sB = size(B) | |
@nloops $N i B begin | |
AI = @nref $N A i | |
(@nref $N B i) = (AI, AI) | |
end | |
Bmax = sB | |
@inbounds @nloops $(N-1) i d -> indices(A,d+1) begin | |
@inbounds @simd for i_0 = indices(A,1) | |
AI = @nref $N A d->i_{d-1} | |
@nexprs $N d->(j_d = min(Bmax[d], i_{d-1})) | |
BJ = @nref $N B j | |
#if AI < BJ[1] | |
# (@nref $N B j) = (AI, BJ[2]) | |
#elseif AI > BJ[2] | |
# (@nref $N B j) = (BJ[1], AI) | |
#end | |
BJ = ifelse(AI < BJ[1], (AI, BJ[2]), BJ) | |
BJ = ifelse(AI > BJ[2], (BJ[1], AI), BJ) | |
(@nref $N B j) = BJ | |
end | |
end | |
return B | |
end | |
end | |
@noinline function extrema_cartesianrange!(B, A) | |
sA = size(A) | |
sB = size(B) | |
for I in CartesianRange(sB) | |
AI = A[I] | |
B[I] = (AI, AI) | |
end | |
Bmax = CartesianIndex(sB) | |
@inbounds @simd for I in CartesianRange(sA) | |
J = min(Bmax,I) | |
BJ = B[J] | |
AI = A[I] | |
#if AI < BJ[1] | |
# B[J] = (AI, BJ[2]) | |
#elseif AI > BJ[2] | |
# B[J] = (BJ[1], AI) | |
#end | |
BJ = ifelse(AI < BJ[1], (AI, BJ[2]), BJ) | |
BJ = ifelse(AI > BJ[2], (BJ[1], AI), BJ) | |
B[J] = BJ | |
end | |
return B | |
end | |
foo = Dict( | |
10 => rand(10,11,12), | |
100 => rand(100,101,102), | |
1000 => rand(1000,1001,1002)) | |
global dim, sz | |
for dim in [1,2,(1,2)], sz in sort(collect(keys(foo))) | |
info("dim=",dim,", sz=",sz) | |
@test extrema_mapslices(foo[sz],dim) == extrema_functor(foo[sz],dim) | |
@test extrema_mapslices(foo[sz],dim) == extrema_cartesian(foo[sz],dim) | |
@test extrema_mapslices(foo[sz],dim) == extrema_cartesian_simd(foo[sz],dim) | |
@test extrema_mapslices(foo[sz],dim) == extrema_cartesianrange(foo[sz],dim) | |
gc(); print("mapslices "); @btime extrema_mapslices(foo[sz],dim); | |
gc(); print("functor "); @btime extrema_functor(foo[sz],dim); | |
gc(); print("cartesian "); @btime extrema_cartesian(foo[sz],dim); | |
gc(); print("cartesian_simd"); @btime extrema_cartesian_simd(foo[sz],dim); | |
gc(); print("cartesianrange"); @btime extrema_cartesianrange(foo[sz],dim); | |
end |
on a 0-day master i now (a year later on julia 0.7) get these timings:
INFO: dim=1, sz=10
mapslices 120.681 μs (1621 allocations: 74.69 KiB)
functor 9.574 μs (22 allocations: 5.09 KiB)
cartesian 3.039 μs (4 allocations: 2.50 KiB)
cartesianrange 6.894 μs (4 allocations: 2.50 KiB)
INFO: dim=1, sz=100
mapslices 13.118 ms (123662 allocations: 12.74 MiB)
functor 4.326 ms (25 allocations: 322.86 KiB)
cartesian 2.339 ms (5 allocations: 161.39 KiB)
cartesianrange 5.556 ms (5 allocations: 161.39 KiB)
INFO: dim=1, sz=1000
mapslices 7.641 s (13018533 allocations: 7.98 GiB)
functor 3.531 s (25 allocations: 30.61 MiB)
cartesian 1.507 s (10 allocations: 15.31 MiB)
cartesianrange 4.486 s (10 allocations: 15.31 MiB)
INFO: dim=2, sz=10
mapslices 118.024 μs (1477 allocations: 69.98 KiB)
functor 5.627 μs (22 allocations: 4.73 KiB)
cartesian 2.756 μs (4 allocations: 2.30 KiB)
cartesianrange 6.737 μs (4 allocations: 2.30 KiB)
INFO: dim=2, sz=100
mapslices 14.129 ms (122438 allocations: 12.61 MiB)
functor 1.695 ms (25 allocations: 319.61 KiB)
cartesian 2.271 ms (5 allocations: 159.77 KiB)
cartesianrange 5.460 ms (5 allocations: 159.77 KiB)
INFO: dim=2, sz=1000
mapslices 10.170 s (13005016 allocations: 8.03 GiB)
functor 1.489 s (25 allocations: 30.58 MiB)
cartesian 1.478 s (10 allocations: 15.29 MiB)
cartesianrange 4.425 s (10 allocations: 15.29 MiB)
INFO: dim=(1, 2), sz=10
mapslices 18.661 μs (172 allocations: 17.50 KiB)
functor 5.925 μs (20 allocations: 1.17 KiB)
cartesian 2.470 μs (3 allocations: 544 bytes)
cartesianrange 6.443 μs (3 allocations: 544 bytes)
INFO: dim=(1, 2), sz=100
mapslices 4.729 ms (1264 allocations: 7.90 MiB)
functor 3.237 ms (20 allocations: 4.02 KiB)
cartesian 1.445 ms (3 allocations: 1.97 KiB)
cartesianrange 4.426 ms (3 allocations: 1.97 KiB)
INFO: dim=(1, 2), sz=1000
mapslices 6.231 s (12555 allocations: 7.47 GiB)
functor 3.252 s (20 allocations: 32.31 KiB)
cartesian 1.402 s (7 allocations: 16.08 KiB)
cartesianrange 4.638 s (7 allocations: 16.08 KiB)
adding @inbounds @simd
to the CartesianRange
variant resulted in a performance equal to Cartesian
. thanks tim!
INFO: dim=1, sz=10
mapslices 123.829 μs (1621 allocations: 74.69 KiB)
functor 9.804 μs (22 allocations: 5.09 KiB)
cartesian 2.978 μs (4 allocations: 2.50 KiB)
cartesianrange 3.228 μs (4 allocations: 2.50 KiB)
INFO: dim=1, sz=100
mapslices 13.223 ms (123662 allocations: 12.74 MiB)
functor 4.337 ms (25 allocations: 322.86 KiB)
cartesian 2.366 ms (5 allocations: 161.39 KiB)
cartesianrange 2.321 ms (5 allocations: 161.39 KiB)
INFO: dim=1, sz=1000
mapslices 6.598 s (13018533 allocations: 7.98 GiB)
functor 3.411 s (25 allocations: 30.61 MiB)
cartesian 1.503 s (10 allocations: 15.31 MiB)
cartesianrange 1.410 s (10 allocations: 15.31 MiB)
INFO: dim=2, sz=10
mapslices 120.972 μs (1477 allocations: 69.98 KiB)
functor 5.656 μs (22 allocations: 4.73 KiB)
cartesian 2.746 μs (4 allocations: 2.30 KiB)
cartesianrange 3.051 μs (4 allocations: 2.30 KiB)
INFO: dim=2, sz=100
mapslices 14.839 ms (122438 allocations: 12.61 MiB)
functor 1.754 ms (25 allocations: 319.61 KiB)
cartesian 2.277 ms (5 allocations: 159.77 KiB)
cartesianrange 2.268 ms (5 allocations: 159.77 KiB)
INFO: dim=2, sz=1000
mapslices 10.981 s (13005016 allocations: 8.03 GiB)
functor 1.520 s (25 allocations: 30.58 MiB)
cartesian 1.479 s (10 allocations: 15.29 MiB)
cartesianrange 1.379 s (10 allocations: 15.29 MiB)
INFO: dim=(1, 2), sz=10
mapslices 18.748 μs (172 allocations: 17.50 KiB)
functor 5.890 μs (20 allocations: 1.17 KiB)
cartesian 2.534 μs (3 allocations: 544 bytes)
cartesianrange 2.703 μs (3 allocations: 544 bytes)
INFO: dim=(1, 2), sz=100
mapslices 4.835 ms (1264 allocations: 7.90 MiB)
functor 3.306 ms (20 allocations: 4.02 KiB)
cartesian 1.393 ms (3 allocations: 1.97 KiB)
cartesianrange 1.337 ms (3 allocations: 1.97 KiB)
INFO: dim=(1, 2), sz=1000
mapslices 6.932 s (12555 allocations: 7.47 GiB)
functor 3.257 s (20 allocations: 32.31 KiB)
cartesian 1.338 s (7 allocations: 16.08 KiB)
cartesianrange 1.212 s (7 allocations: 16.08 KiB)
using ifelse
is faster when dim=2
but slower otherwise. it also makes the simd and non-simd variants of cartesian equal in speed for small arrays:
%%% if ... elseif ... end %%% ifelse
INFO: dim=1, sz=10 INFO: dim=1, sz=10
mapslices 122.846 μs (1621 allocations: 74.69 KiB) mapslices 126.879 μs (1621 allocations: 74.69 KiB)
functor 9.191 μs (22 allocations: 5.09 KiB) functor 10.858 μs (22 allocations: 5.09 KiB)
cartesian 2.878 μs (4 allocations: 2.50 KiB) cartesian 3.068 μs (4 allocations: 2.50 KiB)
cartesian_simd 5.463 μs (4 allocations: 2.50 KiB) cartesian_simd 3.117 μs (4 allocations: 2.50 KiB)
cartesianrange 3.227 μs (4 allocations: 2.50 KiB) cartesianrange 3.354 μs (4 allocations: 2.50 KiB)
INFO: dim=1, sz=100 INFO: dim=1, sz=100
mapslices 13.213 ms (123662 allocations: 12.74 MiB) mapslices 14.398 ms (123662 allocations: 12.74 MiB)
functor 4.311 ms (25 allocations: 322.86 KiB) functor 4.357 ms (25 allocations: 322.86 KiB)
cartesian 2.290 ms (5 allocations: 161.39 KiB) cartesian 2.437 ms (5 allocations: 161.39 KiB)
cartesian_simd 2.236 ms (5 allocations: 161.39 KiB) cartesian_simd 2.439 ms (5 allocations: 161.39 KiB)
cartesianrange 2.242 ms (5 allocations: 161.39 KiB) cartesianrange 2.460 ms (5 allocations: 161.39 KiB)
INFO: dim=1, sz=1000 INFO: dim=1, sz=1000
mapslices 8.208 s (13018533 allocations: 7.98 GiB) mapslices 8.566 s (13018533 allocations: 7.98 GiB)
functor 4.036 s (25 allocations: 30.61 MiB) functor 4.599 s (25 allocations: 30.61 MiB)
cartesian 1.796 s (10 allocations: 15.31 MiB) cartesian 3.023 s (10 allocations: 15.31 MiB)
cartesian_simd 1.790 s (10 allocations: 15.31 MiB) cartesian_simd 2.878 s (10 allocations: 15.31 MiB)
cartesianrange 1.699 s (10 allocations: 15.31 MiB) cartesianrange 2.981 s (10 allocations: 15.31 MiB)
INFO: dim=2, sz=10 INFO: dim=2, sz=10
mapslices 122.202 μs (1477 allocations: 69.98 KiB) mapslices 126.456 μs (1477 allocations: 69.98 KiB)
functor 5.644 μs (22 allocations: 4.73 KiB) functor 5.885 μs (22 allocations: 4.73 KiB)
cartesian 2.850 μs (4 allocations: 2.30 KiB) cartesian 2.669 μs (4 allocations: 2.30 KiB)
cartesian_simd 5.388 μs (4 allocations: 2.30 KiB) cartesian_simd 2.690 μs (4 allocations: 2.30 KiB)
cartesianrange 3.053 μs (4 allocations: 2.30 KiB) cartesianrange 2.641 μs (4 allocations: 2.30 KiB)
INFO: dim=2, sz=100 INFO: dim=2, sz=100
mapslices 15.031 ms (122438 allocations: 12.61 MiB) mapslices 15.137 ms (122438 allocations: 12.61 MiB)
functor 1.617 ms (25 allocations: 319.61 KiB) functor 1.662 ms (25 allocations: 319.61 KiB)
cartesian 2.205 ms (5 allocations: 159.77 KiB) cartesian 1.277 ms (5 allocations: 159.77 KiB)
cartesian_simd 2.212 ms (5 allocations: 159.77 KiB) cartesian_simd 1.242 ms (5 allocations: 159.77 KiB)
cartesianrange 2.200 ms (5 allocations: 159.77 KiB) cartesianrange 1.283 ms (5 allocations: 159.77 KiB)
INFO: dim=2, sz=1000 INFO: dim=2, sz=1000
mapslices 12.855 s (13005016 allocations: 8.03 GiB) mapslices 9.179 s (13005016 allocations: 8.03 GiB)
functor 2.026 s (25 allocations: 30.58 MiB) functor 1.991 s (25 allocations: 30.58 MiB)
cartesian 1.760 s (10 allocations: 15.29 MiB) cartesian 1.477 s (10 allocations: 15.29 MiB)
cartesian_simd 1.915 s (10 allocations: 15.29 MiB) cartesian_simd 1.632 s (10 allocations: 15.29 MiB)
cartesianrange 1.716 s (10 allocations: 15.29 MiB) cartesianrange 1.449 s (10 allocations: 15.29 MiB)
INFO: dim=(1, 2), sz=10 INFO: dim=(1, 2), sz=10
mapslices 21.477 μs (172 allocations: 17.50 KiB) mapslices 19.838 μs (172 allocations: 17.50 KiB)
functor 5.953 μs (20 allocations: 1.17 KiB) functor 5.947 μs (20 allocations: 1.17 KiB)
cartesian 2.469 μs (3 allocations: 544 bytes) cartesian 3.662 μs (3 allocations: 544 bytes)
cartesian_simd 2.746 μs (3 allocations: 544 bytes) cartesian_simd 3.666 μs (3 allocations: 544 bytes)
cartesianrange 2.611 μs (3 allocations: 544 bytes) cartesianrange 3.633 μs (3 allocations: 544 bytes)
INFO: dim=(1, 2), sz=100 INFO: dim=(1, 2), sz=100
mapslices 3.584 ms (1264 allocations: 7.90 MiB) mapslices 3.539 ms (1264 allocations: 7.90 MiB)
functor 3.213 ms (20 allocations: 4.02 KiB) functor 3.268 ms (20 allocations: 4.02 KiB)
cartesian 1.390 ms (3 allocations: 1.97 KiB) cartesian 2.592 ms (3 allocations: 1.97 KiB)
cartesian_simd 1.387 ms (3 allocations: 1.97 KiB) cartesian_simd 2.593 ms (3 allocations: 1.97 KiB)
cartesianrange 1.293 ms (3 allocations: 1.97 KiB) cartesianrange 2.592 ms (3 allocations: 1.97 KiB)
INFO: dim=(1, 2), sz=1000 INFO: dim=(1, 2), sz=1000
mapslices 7.608 s (12555 allocations: 7.47 GiB) mapslices 6.978 s (12555 allocations: 7.47 GiB)
functor 4.163 s (20 allocations: 32.31 KiB) functor 3.982 s (20 allocations: 32.31 KiB)
cartesian 1.615 s (7 allocations: 16.08 KiB) cartesian 2.916 s (7 allocations: 16.08 KiB)
cartesian_simd 1.616 s (7 allocations: 16.08 KiB) cartesian_simd 2.854 s (7 allocations: 16.08 KiB)
cartesianrange 1.535 s (7 allocations: 16.08 KiB) cartesianrange 2.909 s (7 allocations: 16.08 KiB)```
strange that @simd
did not improve the Cartesian variant much, as it had a huge effect on the CartesianRange
variant.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
on a 0-day old master i get this timings: