Last active
August 29, 2015 14:05
-
-
Save floswald/9e79f6f51c276becbd74 to your computer and use it in GitHub Desktop.
integration loops
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# original version: slow | |
# | |
function integrateVbar!(ia::Int,ih::Int,ij::Int,age::Int,Gz::Array{Float64,3},Gyp::Array{Float64,3},Gs::Array{Float64,2},Gtau::Array{Float64,1},m::Model,p::Param) | |
# loop over conditioning states | |
for itau = 1:p.ntau # current tau | |
for ip = 1:p.np # current p | |
for iy = 1:p.ny # current y | |
for iz = 1:p.nz # current z | |
for is = 1:p.ns # current HHsize | |
# set value | |
tmp = 0.0 | |
# compute current index | |
# idx9 returns the linear indices of the 9D-arrays m.EV or m.vbar | |
# (see below) | |
idx0 = idx9(is,iz,iy,ip,itau,ia,ih,ij,age,p) | |
# loop over future states | |
for itau1 = 1:p.ntau # future tau | |
for ip1 = 1:p.np # future p | |
for iy1 = 1:p.ny # future y | |
for iz1 = 1:p.nz # future z | |
for is1 = 1:p.ns # future HHsize | |
# compute future index | |
idx1 = idx9(is1,iz1,iy1,ip1,itau1,ia,ih,ij,age,p) | |
# construct sum | |
@inbounds tmp += m.vbar[idx1] * Gz[iz + p.nz * (iz1 + p.nz * (ij-1)-1)] * Gyp[iy + p.ny * ((ip-1) + p.np * ((iy1-1) + p.ny * ((ip1-1) + p.np * (ij-1)))) ] * Gs[is + p.ns * (is1-1)] * Gtau[itau1] | |
end | |
end | |
end | |
end | |
end | |
# assign to current index | |
m.EV[idx0] = tmp | |
end | |
end | |
end | |
end | |
end | |
return nothing | |
end | |
function idx9(is::Int,iz::Int,iy::Int,ip::Int,itau::Int,ia::Int,ih::Int,ij::Int,age::Int,p::Param) | |
r = is + p.ns * (iz + p.nz * (iy + p.ny * (ip + p.np * (itau + p.ntau * (ia + p.na * (ih + p.nh * (ij + p.nJ * (age-1)-1)-1)-1)-1)-1)-1)-1) | |
return r | |
end | |
# new version | |
# 2 secs faster | |
function integrateVbar!(ia::Int,ih::Int,ij::Int,age::Int,Gz::Array{Float64,3},Gyp::Array{Float64,3},Gs::Array{Float64,2},Gtau::Array{Float64,1},m::Model,p::Param) | |
# offsets | |
o_tau = 0 | |
o_p = 0 | |
o_y = 0 | |
o_z = 0 | |
o_s = 0 | |
# factors | |
f_tau = 0.0 | |
f_py = 0.0 | |
f_z = 0.0 | |
f_s = 0.0 | |
# loop over conditioning states | |
for itau = 1:p.ntau # current tau | |
for ip = 1:p.np # current p | |
for iy = 1:p.ny # current y | |
for iz = 1:p.nz # current z | |
for is = 1:p.ns # current HHsize | |
# set value | |
tmp = 0.0 | |
# compute current index | |
idx0 = idx9(is,iz,iy,ip,itau,ia,ih,ij,age,p) | |
# loop over future states | |
for itau1 = 1:p.ntau # future tau | |
idx1 = idx_tau(itau,ia,ih,ij,age,p) | |
o_tau = (idx1 + (itau1-1)) * p.np | |
f_tau = Gtau[itau1] | |
for ip1 = 1:p.np | |
o_p = (o_tau + (ip1-1)) * p.ny | |
for iy1 = 1:p.ny # future y | |
o_y = (o_p + (iy1-1)) * p.nz | |
f_py = f_tau * Gyp[iy + p.ny * ((ip-1) + p.np * ((iy1-1) + p.ny * ((ip1-1) + p.np * (ij-1)))) ] | |
for iz1 = 1:p.nz # future z | |
o_z = (o_y + (iz1-1)) * p.ns | |
f_z = f_py * Gz[iz + p.nz * (iz1 + p.nz * (ij-1)-1)] | |
for is1 = 1:p.ns # future HHsize | |
# construct sum | |
@inbounds tmp += m.vbar[o_z + is1] * Gs[is + p.ns * (is1-1)] * f_z | |
end | |
end | |
end | |
end | |
end | |
# assign to current index | |
m.EV[idx0] = tmp | |
end | |
end | |
end | |
end | |
end | |
return nothing | |
end | |
awesome!
This is something I want simple tensor to do automatically, but it is a bit tough :-)
actually, I was thinking about changing
for is1 = 1:p.ns # future HHsize
@inbounds tmp += m.vbar[o_z + is1] * Gs[is + p.ns * (is1-1)] * f_z
end
to
for is1 = 1:p.ns # future HHsize
@inbounds tmp += m.vbar[o_z + is1] * Gs[is + p.ns * (is1-1)]
end
tmp *= f_z
this way you also save a lot of multiplies, but what you did was already saving a bunch.
I would also move the @inbound completely outside the loop. Let me send you sg that worked well for me.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
i see! should have thought of that! doing this reduces the time in this function by a factor of 4.5!
i have a couple of places like that. cheers!