Skip to content

Instantly share code, notes, and snippets.

@tkf
Last active January 12, 2018 06:13
Show Gist options
  • Save tkf/f7bc4a78ee071931e83c57f3d49e828b to your computer and use it in GitHub Desktop.
Save tkf/f7bc4a78ee071931e83c57f3d49e828b to your computer and use it in GitHub Desktop.
# コードは以下の URL より抜粋 & 改変:
# https://gist.github.com/genkuroki/799b4f2f5b081cfd4c7fca02fcffa23d
# (Revision: 66a60d5c8eafa420e3b5659e8b3181d7bbd71067)
# In [1]:
using BenchmarkTools
seed = 2018
cmap = "CMRmap"
# 平面全体での離散ラプラシアンの計算
#
# laplacian_local! は離散ラプラシアンの値を局所的に計算する函数である.
# このように, 局所的なラプラシアンの計算を別の函数として分離しておく.
#
function laplacian!(v, u, laplacian_local!)
m, n = size(u)[end-1:end]
for i in 1:m
for j in 1:n
laplacian_local!(v, u, m, n, i, j)
end
end
end
# laplacian_local! の性能のテスト
#
function test!(v, u, laplacian_local!; niters = 10)
for iter in 1:niters
laplacian!(v, u, laplacian_local!)
end
end
# In [2]:
# ラプラシアンの値を局所的に計算するための函数をこのように分離しておく.
# @inline と @. を併用すると, メモリ消費も少なく, 計算も速くなる.
#
@inline function laplacian_atdot_inline!(v, u, m, n, i, j)
@.(
v[:,i,j] =
@view(u[:, ifelse(i+1 ≤ m, i+1, 1), j]) + @view(u[:, ifelse(i-1 ≥ 1, i-1, m), j]) +
@view(u[:, i, ifelse(j+1 ≤ n, j+1, 1)]) + @view(u[:, i, ifelse(j-1 ≥ 1, j-1, n)]) -
4*@view(u[:, i, j])
)
end
# In [3]:
function laplacian_atdot!(v, u, m, n, i, j)
@.(
v[:,i,j] =
@view(u[:, ifelse(i+1 ≤ m, i+1, 1), j]) + @view(u[:, ifelse(i-1 ≥ 1, i-1, m), j]) +
@view(u[:, i, ifelse(j+1 ≤ n, j+1, 1)]) + @view(u[:, i, ifelse(j-1 ≥ 1, j-1, n)]) -
4*@view(u[:, i, j])
)
end
# --- ベンチマーク ---
# バージョン情報とコマンドラインオプションを表示
versioninfo()
@show Base.JLOptions().check_bounds # 0: unspecified; 1: yes; 2: no
@show Base.JLOptions().can_inline # 0: no; 1: yes
# オリジナルのベンチマーク
n = 1000
srand(seed)
u = rand(3, n, n)
v = Array{Float64,3}(3, n, n)
println()
println("** laplacian_atdot_inline! / n=$n **")
display(@benchmark test!(v, u, laplacian_atdot_inline!))
println()
println()
println("** laplacian_atdot! / n=$n **")
display(@benchmark test!(v, u, laplacian_atdot!))
println()
# ループの最深部で呼び出される laplacian_local! の呼び出し回数 (~n^2)
# を減らしてlaplacian! の呼び出し回数 (niters) を増やすと
# laplacian_atdot_inline! でもメモリ使用量が増えて GC が起こる。
n = 10
srand(seed)
u = rand(3, n, n)
v = Array{Float64,3}(3, n, n)
niters = 10000000
println()
println("** laplacian_atdot_inline! / n=$n, niters=$niters **")
display(@benchmark test!(v, u, laplacian_atdot_inline!))
println()
println()
println("** laplacian_atdot! / n=$n, niters=$niters **")
display(@benchmark test!(v, u, laplacian_atdot!))
println()
Julia Version 0.6.2
Commit d386e40c17 (2017-12-13 18:08 UTC)
Platform Info:
OS: Linux (x86_64-pc-linux-gnu)
CPU: Intel(R) Core(TM) i7-4500U CPU @ 1.80GHz
WORD_SIZE: 64
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
LAPACK: libopenblas64_
LIBM: libopenlibm
LLVM: libLLVM-3.9.1 (ORCJIT, haswell)
Base.JLOptions().check_bounds = 0
Base.JLOptions().can_inline = 1
** laplacian_atdot_inline! / n=1000 **
BenchmarkTools.Trial:
memory estimate: 320 bytes
allocs estimate: 10
--------------
minimum time: 904.220 ms (0.00% GC)
median time: 915.531 ms (0.00% GC)
mean time: 913.809 ms (0.00% GC)
maximum time: 920.677 ms (0.00% GC)
--------------
samples: 6
evals/sample: 1
** laplacian_atdot! / n=1000 **
BenchmarkTools.Trial:
memory estimate: 2.98 GiB
allocs estimate: 50000010
--------------
minimum time: 1.487 s (5.73% GC)
median time: 1.495 s (5.75% GC)
mean time: 1.497 s (5.75% GC)
maximum time: 1.509 s (5.77% GC)
--------------
samples: 4
evals/sample: 1
** laplacian_atdot_inline! / n=10, niters=10000000 **
BenchmarkTools.Trial:
memory estimate: 320 bytes
allocs estimate: 10
--------------
minimum time: 48.545 μs (0.00% GC)
median time: 48.912 μs (0.00% GC)
mean time: 50.913 μs (0.00% GC)
maximum time: 121.087 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 1
** laplacian_atdot! / n=10, niters=10000000 **
BenchmarkTools.Trial:
memory estimate: 312.81 KiB
allocs estimate: 5010
--------------
minimum time: 95.745 μs (0.00% GC)
median time: 103.194 μs (0.00% GC)
mean time: 115.415 μs (7.61% GC)
maximum time: 1.261 ms (80.02% GC)
--------------
samples: 10000
evals/sample: 1
Julia Version 0.7.0-DEV.3354
Commit 9b5eed2b6c (2018-01-09 08:03 UTC)
Platform Info:
OS: Linux (x86_64-pc-linux-gnu)
CPU: Intel(R) Core(TM) i7-4500U CPU @ 1.80GHz
WORD_SIZE: 64
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
LAPACK: libopenblas64_
LIBM: libopenlibm
LLVM: libLLVM-3.9.1 (ORCJIT, haswell)
Environment:
(Base.JLOptions()).check_bounds = 0
(Base.JLOptions()).can_inline = 1
** laplacian_atdot_inline! / n=1000 **
BenchmarkTools.Trial:
memory estimate: 320 bytes
allocs estimate: 10
--------------
minimum time: 358.989 ms (0.00% GC)
median time: 367.559 ms (0.00% GC)
mean time: 367.145 ms (0.00% GC)
maximum time: 375.536 ms (0.00% GC)
--------------
samples: 14
evals/sample: 1
** laplacian_atdot! / n=1000 **
BenchmarkTools.Trial:
memory estimate: 610.35 MiB
allocs estimate: 10000010
--------------
minimum time: 698.321 ms (2.14% GC)
median time: 785.037 ms (2.25% GC)
mean time: 789.565 ms (2.18% GC)
maximum time: 862.514 ms (2.13% GC)
--------------
samples: 7
evals/sample: 1
** laplacian_atdot_inline! / n=10, niters=10000000 **
BenchmarkTools.Trial:
memory estimate: 320 bytes
allocs estimate: 10
--------------
minimum time: 17.739 μs (0.00% GC)
median time: 17.792 μs (0.00% GC)
mean time: 18.224 μs (0.00% GC)
maximum time: 46.860 μs (0.00% GC)
--------------
samples: 10000
evals/sample: 1
** laplacian_atdot! / n=10, niters=10000000 **
BenchmarkTools.Trial:
memory estimate: 62.81 KiB
allocs estimate: 1010
--------------
minimum time: 31.741 μs (0.00% GC)
median time: 32.416 μs (0.00% GC)
mean time: 41.096 μs (16.72% GC)
maximum time: 41.179 ms (99.88% GC)
--------------
samples: 10000
evals/sample: 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment