Last active
August 29, 2015 14:05
-
-
Save AlexKordic/99113326daeeb33b584a to your computer and use it in GitHub Desktop.
300,000 4x4 matrix multiply inversions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local ffi = require("ffi") | |
local new = ffi.new | |
--[[ | |
All numeric calculations are performed with doubles. Using | |
floats for storage saves memory (for big arrays). But arithmetic | |
is usually slower due to the required float<->double conversions. | |
]] | |
ffi.cdef([[ | |
struct Matrix { | |
double entries[16]; | |
}; | |
]]) | |
local Matrix | |
local MatrixStructure = ffi.typeof("struct Matrix") | |
local MatrixMetatable = {} | |
-- MatrixMetatable.__new = function (_ctype) | |
-- local ret = new(_ctype) | |
-- for i=0, 15 do ret.entries[i] = 0 end | |
-- return ret | |
-- end | |
local function _new_matrix() | |
return Matrix() | |
-- return ffi.new(MatrixStructure) | |
-- return ffi.new("struct Matrix") | |
end | |
MatrixMetatable.__index = {} | |
function MatrixMetatable.__index.identity(self) | |
for i=0, 15, 5 do self.entries[i] = 1 end | |
end | |
-- function MatrixMetatable.__mul(self, m) | |
function MatrixMetatable.__index.mul(self, m) | |
local ret = _new_matrix() | |
local me, se = m.entries, self.entries | |
for i=0, 12, 4 do | |
for j=0, 3 do | |
ret.entries[i+j] = me[j] * se[i] + me[j+4] * se[i+1] + me[j+8] * se[i+2] + me[j+12] * se[i+3] | |
end | |
end | |
return ret | |
end | |
function MatrixMetatable.__index.clone(self) | |
local ret = _new_matrix() | |
for i=0, 15 do ret.entries[i] = self.entries[i] end | |
return ret | |
end | |
function MatrixMetatable.__index.inverse(self) | |
local ret = _new_matrix() | |
local e = self.entries | |
local rete = ret.entries | |
rete[0] = e[5] * e[10] * e[15] - | |
e[5] * e[11] * e[14] - | |
e[9] * e[6] * e[15] + | |
e[9] * e[7] * e[14] + | |
e[13] * e[6] * e[11] - | |
e[13] * e[7] * e[10] | |
rete[4] = -e[4] * e[10] * e[15] + | |
e[4] * e[11] * e[14] + | |
e[8] * e[6] * e[15] - | |
e[8] * e[7] * e[14] - | |
e[12] * e[6] * e[11] + | |
e[12] * e[7] * e[10] | |
rete[8] = e[4] * e[9] * e[15] - | |
e[4] * e[11] * e[13] - | |
e[8] * e[5] * e[15] + | |
e[8] * e[7] * e[13] + | |
e[12] * e[5] * e[11] - | |
e[12] * e[7] * e[9] | |
rete[12] = -e[4] * e[9] * e[14] + | |
e[4] * e[10] * e[13] + | |
e[8] * e[5] * e[14] - | |
e[8] * e[6] * e[13] - | |
e[12] * e[5] * e[10] + | |
e[12] * e[6] * e[9] | |
rete[1] = -e[1] * e[10] * e[15] + | |
e[1] * e[11] * e[14] + | |
e[9] * e[2] * e[15] - | |
e[9] * e[3] * e[14] - | |
e[13] * e[2] * e[11] + | |
e[13] * e[3] * e[10] | |
rete[5] = e[0] * e[10] * e[15] - | |
e[0] * e[11] * e[14] - | |
e[8] * e[2] * e[15] + | |
e[8] * e[3] * e[14] + | |
e[12] * e[2] * e[11] - | |
e[12] * e[3] * e[10] | |
rete[9] = -e[0] * e[9] * e[15] + | |
e[0] * e[11] * e[13] + | |
e[8] * e[1] * e[15] - | |
e[8] * e[3] * e[13] - | |
e[12] * e[1] * e[11] + | |
e[12] * e[3] * e[9] | |
rete[13] = e[0] * e[9] * e[14] - | |
e[0] * e[10] * e[13] - | |
e[8] * e[1] * e[14] + | |
e[8] * e[2] * e[13] + | |
e[12] * e[1] * e[10] - | |
e[12] * e[2] * e[9] | |
rete[2] = e[1] * e[6] * e[15] - | |
e[1] * e[7] * e[14] - | |
e[5] * e[2] * e[15] + | |
e[5] * e[3] * e[14] + | |
e[13] * e[2] * e[7] - | |
e[13] * e[3] * e[6] | |
rete[6] = -e[0] * e[6] * e[15] + | |
e[0] * e[7] * e[14] + | |
e[4] * e[2] * e[15] - | |
e[4] * e[3] * e[14] - | |
e[12] * e[2] * e[7] + | |
e[12] * e[3] * e[6] | |
rete[10] = e[0] * e[5] * e[15] - | |
e[0] * e[7] * e[13] - | |
e[4] * e[1] * e[15] + | |
e[4] * e[3] * e[13] + | |
e[12] * e[1] * e[7] - | |
e[12] * e[3] * e[5] | |
rete[14] = -e[0] * e[5] * e[14] + | |
e[0] * e[6] * e[13] + | |
e[4] * e[1] * e[14] - | |
e[4] * e[2] * e[13] - | |
e[12] * e[1] * e[6] + | |
e[12] * e[2] * e[5] | |
rete[3] = -e[1] * e[6] * e[11] + | |
e[1] * e[7] * e[10] + | |
e[5] * e[2] * e[11] - | |
e[5] * e[3] * e[10] - | |
e[9] * e[2] * e[7] + | |
e[9] * e[3] * e[6] | |
rete[7] = e[0] * e[6] * e[11] - | |
e[0] * e[7] * e[10] - | |
e[4] * e[2] * e[11] + | |
e[4] * e[3] * e[10] + | |
e[8] * e[2] * e[7] - | |
e[8] * e[3] * e[6] | |
rete[11] = -e[0] * e[5] * e[11] + | |
e[0] * e[7] * e[9] + | |
e[4] * e[1] * e[11] - | |
e[4] * e[3] * e[9] - | |
e[8] * e[1] * e[7] + | |
e[8] * e[3] * e[5] | |
rete[15] = e[0] * e[5] * e[10] - | |
e[0] * e[6] * e[9] - | |
e[4] * e[1] * e[10] + | |
e[4] * e[2] * e[9] + | |
e[8] * e[1] * e[6] - | |
e[8] * e[2] * e[5] | |
local det = e[0] * rete[0] + e[1] * rete[4] + e[2] * rete[8] + e[3] * rete[12] | |
if det == 0 then return self:clone() end -- << must return new matrix not self | |
det = 1.0 / det | |
for i=0, 15 do rete[i] = rete[i] * det end | |
return ret | |
end | |
Matrix = ffi.metatype(MatrixStructure, MatrixMetatable) | |
local mOut | |
local m = _new_matrix() | |
m:identity() | |
local ITERATIONS = 300000 | |
local t = os.clock() | |
for i=1, ITERATIONS do | |
mOut = m:mul(m:inverse()) | |
end | |
local time = os.clock() - t | |
print("time", time, "iterations per sec:", ITERATIONS / time) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment