Created
October 6, 2018 17:35
-
-
Save haampie/e0d7051bc67aeda03661ef429cf1f4a0 to your computer and use it in GitHub Desktop.
spmvbench.jl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This is the implementation currently in SparseArrays | |
""" | |
function simple_mul!(y, A, x) | |
@inbounds for i = Base.OneTo(A.n) | |
xi = x[i] | |
for j = A.colptr[i] : A.colptr[i + 1] - 1 | |
y[A.rowval[j]] += A.nzval[j] * xi | |
end | |
end | |
return y | |
end | |
""" | |
Same implementation as in base, but now with FMA -- barely any difference | |
""" | |
function muladd_mul!(y, A, x) | |
@inbounds for i = Base.OneTo(A.n) | |
xi = x[i] | |
for j = A.colptr[i] : A.colptr[i + 1] - 1 | |
y[A.rowval[j]] = muladd(A.nzval[j], xi, y[A.rowval[j]]) | |
end | |
end | |
return y | |
end | |
""" | |
This version does not construct a range for the inner loop, which can be a slight | |
optimization when the number of nonzeros per column is very small | |
""" | |
function optimizedinnerloop_mul!(y, A, x) | |
j = 1 | |
@inbounds for i = Base.OneTo(A.n) | |
xi = x[i] | |
last_idx = A.colptr[i+1] | |
while j != last_idx | |
row = A.rowval[j] | |
y[row] = muladd(A.nzval[j], xi, y[row]) | |
j += 1 | |
end | |
end | |
return y | |
end | |
""" | |
This version unrolls the inner loop to chunks of size N. This reduces the work of | |
bookkeeping a bit and should give an edge when the number of nonzeros per column is | |
larger. | |
""" | |
@generated unrolled_mul!(y, A, x, sz::Val{N}) where {N} = unrolled_mul_impl!(N) | |
function unrolled_mul_impl!(N::Int) | |
exprs = ntuple(i -> :(y[A.rowval[j+$(i-1)]] = muladd(A.nzval[j+$(i-1)], xi, y[A.rowval[j+$(i-1)]])), Val(N)) | |
return quote | |
j = 1 | |
@inbounds for i = Base.OneTo(A.n) | |
xi = x[i] | |
last_idx = A.colptr[i+1] | |
# N simultaneously | |
while j + $N ≤ last_idx | |
$(exprs...) | |
j += $N | |
end | |
# Fix the remainder | |
while j != last_idx | |
y[A.rowval[j]] = muladd(A.nzval[j], xi, y[A.rowval[j]]) | |
j += 1 | |
end | |
end | |
return y | |
end | |
end | |
## Testing | |
using Test, SparseArrays, LinearAlgebra | |
function test_things() | |
A = sprand(100, 100, 0.3) | |
x = rand(100) | |
y1 = rand(100) | |
y2 = copy(y1) | |
y3 = copy(y1) | |
y4 = copy(y1) | |
y5 = copy(y1) | |
simple_mul!(y1, A, x) | |
muladd_mul!(y2, A, x) | |
optimizedinnerloop_mul!(y3, A, x) | |
unrolled_mul!(y4, A, x, Val{2}()) | |
unrolled_mul!(y5, A, x, Val{4}()) | |
@test y1 ≈ y2 ≈ y3 ≈ y4 ≈ y5 | |
end | |
## benching | |
using BenchmarkTools | |
function bench_increasing_bandwidth(n = 1000, ks = 0:10, ::Type{T} = Float64) where {T} | |
results = ntuple(i -> Float64[], 4) | |
for k = ks | |
A = spdiagm((i => rand(T, n - abs(i)) for i = -k:k)...) | |
n = size(A, 1) | |
x = rand(T, n) | |
y = rand(T, n) | |
flop = 2nnz(A) | |
@info "Benchmarking" k flop | |
push!(results[1], flop / @belapsed(simple_mul!($y, $A, $x)) / 1e9) | |
push!(results[2], flop / @belapsed(optimizedinnerloop_mul!($y, $A, $x)) / 1e9) | |
push!(results[3], flop / @belapsed(unrolled_mul!($y, $A, $x, $(Val{2}()))) / 1e9) | |
push!(results[4], flop / @belapsed(unrolled_mul!($y, $A, $x, $(Val{4}()))) / 1e9) | |
@show last.(results) | |
end | |
results | |
end | |
function bench_increasing_density(n = 1000, ks = 0:10, ::Type{T} = Float64) where {T} | |
results = ntuple(i -> Float64[], 4) | |
for k = ks | |
A = sprand(n, n, (2k+1) / n) | |
n = size(A, 1) | |
x = rand(T, n) | |
y = rand(T, n) | |
flop = 2nnz(A) | |
@info "Benchmarking" k flop | |
push!(results[1], flop / @belapsed(simple_mul!($y, $A, $x)) / 1e9) | |
push!(results[2], flop / @belapsed(optimizedinnerloop_mul!($y, $A, $x)) / 1e9) | |
push!(results[3], flop / @belapsed(unrolled_mul!($y, $A, $x, $(Val{2}()))) / 1e9) | |
push!(results[4], flop / @belapsed(unrolled_mul!($y, $A, $x, $(Val{4}()))) / 1e9) | |
@show last.(results) | |
end | |
results | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment