Last active
December 2, 2015 17:15
-
-
Save mraleph/e17323574ccbf8c56605 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Make this function more LuaJIT friendly while keeping its semantics intact. | |
-- Assume that t are always small table literals (<5 elements) and surrounding loop | |
-- is traced. | |
local function len(t) | |
return #t | |
end | |
-- Make this loop 50x faster by tweaking len above. | |
local sum = 0 | |
for i = 1, 10000000 do | |
sum = sum + len {i, i} + len {i} | |
end | |
print(sum) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local function len(t) | |
-- LuaJIT actually is unable to fold #{x, y, z} to 3 and strictly speaking its impossible to write | |
-- a folding rule that would always fold #{x, y, z} to 3 - because the actual value depends on | |
-- which of x, y, z are nil (or not nil), e.g. #{1, nil, 1} evaluates to 1 and #{1, 1, nil } to 2. | |
-- However LuaJIT is *able* to fold ({x,y, z})[4] to nil so the solution here is to | |
-- try and do linear probing following the definition of #t in Lua manual: | |
if t[1] == nil then return 0 | |
elseif t[2] == nil then return 1 | |
elseif t[3] == nil then return 2 | |
elseif t[4] == nil then return 3 | |
elseif t[5] == nil then return 4 | |
elseif t[6] == nil then return 5 | |
else | |
return #t -- slow path | |
end | |
end | |
local sum = 0 | |
for i = 1, 10000000 do | |
-- Now the tracer will actually fold away the usage of these temporary | |
-- tables - and their allocation will be sunk leading to a very tight loop: | |
-- | |
-- ->LOOP: | |
-- 0bcaff70 addsd xmm7, xmm1 | |
-- 0bcaff74 addsd xmm7, xmm0 | |
-- 0bcaff78 add ebp, +0x01 | |
-- 0bcaff7b cmp ebp, 0x00989680 | |
-- 0bcaff81 jle 0x0bcaff70 ->LOOP | |
-- 0bcaff83 jmp 0x0bca0028 ->6 | |
-- | |
sum = sum + len {i, i} + len {i} | |
end | |
print(sum) | |
-- | |
-- This koan might seem a purely theoretical excercise | |
-- but some Lua projects (e.g. torch) actually implement | |
-- multidimensional index-access operators like this: | |
-- | |
-- A[{i,j}] = B[{i, j}] + C[{i, j}] | |
-- | |
-- so if you would like to have this folded into something, ahm, | |
-- decently performant by the JIT - then you have to use this trick. | |
-- | |
-- Of course, somebody could just go and add a folding rule to LuaJIT to | |
-- at least handle #{x_1, ..., x_n} when elements are known to not be nil. | |
-- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment