Last active
August 29, 2015 14:05
-
-
Save floswald/9e79f6f51c276becbd74 to your computer and use it in GitHub Desktop.
integration loops
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# original version: slow | |
# | |
function integrateVbar!(ia::Int,ih::Int,ij::Int,age::Int,Gz::Array{Float64,3},Gyp::Array{Float64,3},Gs::Array{Float64,2},Gtau::Array{Float64,1},m::Model,p::Param) | |
# loop over conditioning states | |
for itau = 1:p.ntau # current tau | |
for ip = 1:p.np # current p | |
for iy = 1:p.ny # current y | |
for iz = 1:p.nz # current z | |
for is = 1:p.ns # current HHsize | |
# set value | |
tmp = 0.0 | |
# compute current index | |
# idx9 returns the linear indices of the 9D-arrays m.EV or m.vbar | |
# (see below) | |
idx0 = idx9(is,iz,iy,ip,itau,ia,ih,ij,age,p) | |
# loop over future states | |
for itau1 = 1:p.ntau # future tau | |
for ip1 = 1:p.np # future p | |
for iy1 = 1:p.ny # future y | |
for iz1 = 1:p.nz # future z | |
for is1 = 1:p.ns # future HHsize | |
# compute future index | |
idx1 = idx9(is1,iz1,iy1,ip1,itau1,ia,ih,ij,age,p) | |
# construct sum | |
@inbounds tmp += m.vbar[idx1] * Gz[iz + p.nz * (iz1 + p.nz * (ij-1)-1)] * Gyp[iy + p.ny * ((ip-1) + p.np * ((iy1-1) + p.ny * ((ip1-1) + p.np * (ij-1)))) ] * Gs[is + p.ns * (is1-1)] * Gtau[itau1] | |
end | |
end | |
end | |
end | |
end | |
# assign to current index | |
m.EV[idx0] = tmp | |
end | |
end | |
end | |
end | |
end | |
return nothing | |
end | |
function idx9(is::Int,iz::Int,iy::Int,ip::Int,itau::Int,ia::Int,ih::Int,ij::Int,age::Int,p::Param) | |
r = is + p.ns * (iz + p.nz * (iy + p.ny * (ip + p.np * (itau + p.ntau * (ia + p.na * (ih + p.nh * (ij + p.nJ * (age-1)-1)-1)-1)-1)-1)-1)-1) | |
return r | |
end | |
# new version | |
# 2 secs faster | |
function integrateVbar!(ia::Int,ih::Int,ij::Int,age::Int,Gz::Array{Float64,3},Gyp::Array{Float64,3},Gs::Array{Float64,2},Gtau::Array{Float64,1},m::Model,p::Param) | |
# offsets | |
o_tau = 0 | |
o_p = 0 | |
o_y = 0 | |
o_z = 0 | |
o_s = 0 | |
# factors | |
f_tau = 0.0 | |
f_py = 0.0 | |
f_z = 0.0 | |
f_s = 0.0 | |
# loop over conditioning states | |
for itau = 1:p.ntau # current tau | |
for ip = 1:p.np # current p | |
for iy = 1:p.ny # current y | |
for iz = 1:p.nz # current z | |
for is = 1:p.ns # current HHsize | |
# set value | |
tmp = 0.0 | |
# compute current index | |
idx0 = idx9(is,iz,iy,ip,itau,ia,ih,ij,age,p) | |
# loop over future states | |
for itau1 = 1:p.ntau # future tau | |
idx1 = idx_tau(itau,ia,ih,ij,age,p) | |
o_tau = (idx1 + (itau1-1)) * p.np | |
f_tau = Gtau[itau1] | |
for ip1 = 1:p.np | |
o_p = (o_tau + (ip1-1)) * p.ny | |
for iy1 = 1:p.ny # future y | |
o_y = (o_p + (iy1-1)) * p.nz | |
f_py = f_tau * Gyp[iy + p.ny * ((ip-1) + p.np * ((iy1-1) + p.ny * ((ip1-1) + p.np * (ij-1)))) ] | |
for iz1 = 1:p.nz # future z | |
o_z = (o_y + (iz1-1)) * p.ns | |
f_z = f_py * Gz[iz + p.nz * (iz1 + p.nz * (ij-1)-1)] | |
for is1 = 1:p.ns # future HHsize | |
# construct sum | |
@inbounds tmp += m.vbar[o_z + is1] * Gs[is + p.ns * (is1-1)] * f_z | |
end | |
end | |
end | |
end | |
end | |
# assign to current index | |
m.EV[idx0] = tmp | |
end | |
end | |
end | |
end | |
end | |
return nothing | |
end | |
i see! should have thought of that! doing this reduces the time in this function by a factor of 4.5!
i have a couple of places like that. cheers!
awesome!
This is something I want simple tensor to do automatically, but it is a bit tough :-)
actually, I was thinking about changing
for is1 = 1:p.ns # future HHsize
@inbounds tmp += m.vbar[o_z + is1] * Gs[is + p.ns * (is1-1)] * f_z
end
to
for is1 = 1:p.ns # future HHsize
@inbounds tmp += m.vbar[o_z + is1] * Gs[is + p.ns * (is1-1)]
end
tmp *= f_z
this way you also save a lot of multiplies, but what you did was already saving a bunch.
I would also move the @inbound completely outside the loop. Let me send you sg that worked well for me.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
the multiplies are probably the most expansive things, so one thing that can speed up is to factor out the multiplications that you can.
For instance, here you multiply by Gtau[itau1] within ip1, iy1, iz1,is1 but you could construct the rest of the expression first, and then comping out of the loop at line 34, just multiply that number by Gtau[itau1]. This can reduce significantly the number of multiplies. I do this things in LLMR and speeds it up.
Same thing with the construction of the index. You could make it a macro so that it is replaced at compile time. You should factor some of the multiplications to avoid doing them in the deepest part of the loop.
Finally you could try the SIMD thing, it will work on the HPC since they intel processors.