Last active
January 12, 2018 06:13
-
-
Save tkf/f7bc4a78ee071931e83c57f3d49e828b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# コードは以下の URL より抜粋 & 改変: | |
# https://gist.github.com/genkuroki/799b4f2f5b081cfd4c7fca02fcffa23d | |
# (Revision: 66a60d5c8eafa420e3b5659e8b3181d7bbd71067) | |
# In [1]: | |
using BenchmarkTools | |
seed = 2018 | |
cmap = "CMRmap" | |
# 平面全体での離散ラプラシアンの計算 | |
# | |
# laplacian_local! は離散ラプラシアンの値を局所的に計算する函数である. | |
# このように, 局所的なラプラシアンの計算を別の函数として分離しておく. | |
# | |
function laplacian!(v, u, laplacian_local!) | |
m, n = size(u)[end-1:end] | |
for i in 1:m | |
for j in 1:n | |
laplacian_local!(v, u, m, n, i, j) | |
end | |
end | |
end | |
# laplacian_local! の性能のテスト | |
# | |
function test!(v, u, laplacian_local!; niters = 10) | |
for iter in 1:niters | |
laplacian!(v, u, laplacian_local!) | |
end | |
end | |
# In [2]: | |
# ラプラシアンの値を局所的に計算するための函数をこのように分離しておく. | |
# @inline と @. を併用すると, メモリ消費も少なく, 計算も速くなる. | |
# | |
@inline function laplacian_atdot_inline!(v, u, m, n, i, j) | |
@.( | |
v[:,i,j] = | |
@view(u[:, ifelse(i+1 ≤ m, i+1, 1), j]) + @view(u[:, ifelse(i-1 ≥ 1, i-1, m), j]) + | |
@view(u[:, i, ifelse(j+1 ≤ n, j+1, 1)]) + @view(u[:, i, ifelse(j-1 ≥ 1, j-1, n)]) - | |
4*@view(u[:, i, j]) | |
) | |
end | |
# In [3]: | |
function laplacian_atdot!(v, u, m, n, i, j) | |
@.( | |
v[:,i,j] = | |
@view(u[:, ifelse(i+1 ≤ m, i+1, 1), j]) + @view(u[:, ifelse(i-1 ≥ 1, i-1, m), j]) + | |
@view(u[:, i, ifelse(j+1 ≤ n, j+1, 1)]) + @view(u[:, i, ifelse(j-1 ≥ 1, j-1, n)]) - | |
4*@view(u[:, i, j]) | |
) | |
end | |
# --- ベンチマーク --- | |
# バージョン情報とコマンドラインオプションを表示 | |
versioninfo() | |
@show Base.JLOptions().check_bounds # 0: unspecified; 1: yes; 2: no | |
@show Base.JLOptions().can_inline # 0: no; 1: yes | |
# オリジナルのベンチマーク | |
n = 1000 | |
srand(seed) | |
u = rand(3, n, n) | |
v = Array{Float64,3}(3, n, n) | |
println() | |
println("** laplacian_atdot_inline! / n=$n **") | |
display(@benchmark test!(v, u, laplacian_atdot_inline!)) | |
println() | |
println() | |
println("** laplacian_atdot! / n=$n **") | |
display(@benchmark test!(v, u, laplacian_atdot!)) | |
println() | |
# ループの最深部で呼び出される laplacian_local! の呼び出し回数 (~n^2) | |
# を減らしてlaplacian! の呼び出し回数 (niters) を増やすと | |
# laplacian_atdot_inline! でもメモリ使用量が増えて GC が起こる。 | |
n = 10 | |
srand(seed) | |
u = rand(3, n, n) | |
v = Array{Float64,3}(3, n, n) | |
niters = 10000000 | |
println() | |
println("** laplacian_atdot_inline! / n=$n, niters=$niters **") | |
display(@benchmark test!(v, u, laplacian_atdot_inline!)) | |
println() | |
println() | |
println("** laplacian_atdot! / n=$n, niters=$niters **") | |
display(@benchmark test!(v, u, laplacian_atdot!)) | |
println() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Julia Version 0.6.2 | |
Commit d386e40c17 (2017-12-13 18:08 UTC) | |
Platform Info: | |
OS: Linux (x86_64-pc-linux-gnu) | |
CPU: Intel(R) Core(TM) i7-4500U CPU @ 1.80GHz | |
WORD_SIZE: 64 | |
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell) | |
LAPACK: libopenblas64_ | |
LIBM: libopenlibm | |
LLVM: libLLVM-3.9.1 (ORCJIT, haswell) | |
Base.JLOptions().check_bounds = 0 | |
Base.JLOptions().can_inline = 1 | |
** laplacian_atdot_inline! / n=1000 ** | |
BenchmarkTools.Trial: | |
memory estimate: 320 bytes | |
allocs estimate: 10 | |
-------------- | |
minimum time: 904.220 ms (0.00% GC) | |
median time: 915.531 ms (0.00% GC) | |
mean time: 913.809 ms (0.00% GC) | |
maximum time: 920.677 ms (0.00% GC) | |
-------------- | |
samples: 6 | |
evals/sample: 1 | |
** laplacian_atdot! / n=1000 ** | |
BenchmarkTools.Trial: | |
memory estimate: 2.98 GiB | |
allocs estimate: 50000010 | |
-------------- | |
minimum time: 1.487 s (5.73% GC) | |
median time: 1.495 s (5.75% GC) | |
mean time: 1.497 s (5.75% GC) | |
maximum time: 1.509 s (5.77% GC) | |
-------------- | |
samples: 4 | |
evals/sample: 1 | |
** laplacian_atdot_inline! / n=10, niters=10000000 ** | |
BenchmarkTools.Trial: | |
memory estimate: 320 bytes | |
allocs estimate: 10 | |
-------------- | |
minimum time: 48.545 μs (0.00% GC) | |
median time: 48.912 μs (0.00% GC) | |
mean time: 50.913 μs (0.00% GC) | |
maximum time: 121.087 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1 | |
** laplacian_atdot! / n=10, niters=10000000 ** | |
BenchmarkTools.Trial: | |
memory estimate: 312.81 KiB | |
allocs estimate: 5010 | |
-------------- | |
minimum time: 95.745 μs (0.00% GC) | |
median time: 103.194 μs (0.00% GC) | |
mean time: 115.415 μs (7.61% GC) | |
maximum time: 1.261 ms (80.02% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Julia Version 0.7.0-DEV.3354 | |
Commit 9b5eed2b6c (2018-01-09 08:03 UTC) | |
Platform Info: | |
OS: Linux (x86_64-pc-linux-gnu) | |
CPU: Intel(R) Core(TM) i7-4500U CPU @ 1.80GHz | |
WORD_SIZE: 64 | |
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell) | |
LAPACK: libopenblas64_ | |
LIBM: libopenlibm | |
LLVM: libLLVM-3.9.1 (ORCJIT, haswell) | |
Environment: | |
(Base.JLOptions()).check_bounds = 0 | |
(Base.JLOptions()).can_inline = 1 | |
** laplacian_atdot_inline! / n=1000 ** | |
BenchmarkTools.Trial: | |
memory estimate: 320 bytes | |
allocs estimate: 10 | |
-------------- | |
minimum time: 358.989 ms (0.00% GC) | |
median time: 367.559 ms (0.00% GC) | |
mean time: 367.145 ms (0.00% GC) | |
maximum time: 375.536 ms (0.00% GC) | |
-------------- | |
samples: 14 | |
evals/sample: 1 | |
** laplacian_atdot! / n=1000 ** | |
BenchmarkTools.Trial: | |
memory estimate: 610.35 MiB | |
allocs estimate: 10000010 | |
-------------- | |
minimum time: 698.321 ms (2.14% GC) | |
median time: 785.037 ms (2.25% GC) | |
mean time: 789.565 ms (2.18% GC) | |
maximum time: 862.514 ms (2.13% GC) | |
-------------- | |
samples: 7 | |
evals/sample: 1 | |
** laplacian_atdot_inline! / n=10, niters=10000000 ** | |
BenchmarkTools.Trial: | |
memory estimate: 320 bytes | |
allocs estimate: 10 | |
-------------- | |
minimum time: 17.739 μs (0.00% GC) | |
median time: 17.792 μs (0.00% GC) | |
mean time: 18.224 μs (0.00% GC) | |
maximum time: 46.860 μs (0.00% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1 | |
** laplacian_atdot! / n=10, niters=10000000 ** | |
BenchmarkTools.Trial: | |
memory estimate: 62.81 KiB | |
allocs estimate: 1010 | |
-------------- | |
minimum time: 31.741 μs (0.00% GC) | |
median time: 32.416 μs (0.00% GC) | |
mean time: 41.096 μs (16.72% GC) | |
maximum time: 41.179 ms (99.88% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment