Created
March 26, 2021 22:03
-
-
Save matthieubulte/497a82757ce781178d2121b326590260 to your computer and use it in GitHub Desktop.
Compiling sparse matrix multiplication
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using LinearAlgebra | |
using BenchmarkTools | |
using SparseArrays | |
# To exploit the sparsity of the matrix as much as possible, pre-compile the multiplication operation | |
# to skip the matrix entries look-up. Since the matrix is sparse this might generate only a few operations | |
# which are all inlined. | |
# Note that this only works with small matrices since otherwise the size of the function will just be to | |
# large. | |
# Note that this could also be avoided, but this is another topic. | |
function compile_lmul!(A::SparseArrays.AbstractSparseMatrixCSC) | |
nzv = nonzeros(A) | |
rv = rowvals(A) | |
statements = [] | |
for k in 1:size(A, 1) | |
for col in 1:size(A, 2) | |
for j in nzrange(A, col) | |
push!(statements, | |
:(@inbounds C[$(rv[j]), $(k)] += $(nzv[j])*B[$(col),$(k)]) | |
) | |
end | |
end | |
end | |
ex = Expr(:block) | |
ex.args = statements | |
eval(:( | |
function(C, B) | |
$(ex) | |
C | |
end | |
)) | |
end | |
# Source of the model https://www.ronanarraes.com/2019/04/my-julia-workflow-readability-vs-performance/ | |
const O3x3 = zeros(3,3) | |
const O3x12 = zeros(3,12) | |
const O12x18 = zeros(12,18) | |
I6 = Matrix{Float64}(I,6,6) | |
I18 = Matrix{Float64}(I,18,18) | |
λ = 1/100 | |
wx = Float64[ 0 -1 0; | |
1 0 0; | |
0 0 0;] | |
Ak_1 = [ wx -I O3x12; | |
O3x3 λ*I O3x12; | |
O12x18 ;] | |
Fk_1 = exp(Ak_1); | |
mul_sFk_1! = compile_lmul!(sparse(sFk_1)); | |
aux1 = zeros(18,18); | |
Pu = Matrix{Float64}(I,18,18) | |
@btime mul!(aux1, Fk_1, Pu); | |
# 619.859 ns (0 allocations: 0 bytes) | |
@btime mul!(aux1, sFk_1, Pu, 1, 1); | |
# 692.493 ns (0 allocations: 0 bytes) | |
@btime mul_sFk_1!(aux1, Pu); | |
# 157.352 ns (0 allocations: 0 bytes) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment