Skip to content

Instantly share code, notes, and snippets.

@Validark
Created March 1, 2021 08:19
Show Gist options
  • Select an option

  • Save Validark/c86a3e92dde76d26e6893d796dfea89a to your computer and use it in GitHub Desktop.

Select an option

Save Validark/c86a3e92dde76d26e6893d796dfea89a to your computer and use it in GitHub Desktop.
This monstrosity is a giant test bench for figuring out which regular expression-based functions take linear or quadratic time. It can also be used just for performance comparisons
local R1 = 2^10
local R2 = 2^11 -- the size of strings you want to test
local find = string.find
local sub = string.sub
local gsub = string.gsub
local match = string.match
local gmatch = string.gmatch
local byte = string.byte
local candidates = {
[1] = function(str)
return match(str, ".*%S", match(str, "^%s*()"))
end;
-- [2] = function(str)
-- local start = match(str, "^%s*()")
-- local last = start
-- for k in gmatch(str, "%S+()") do
-- last = k
-- end
-- return sub(str, start, last)
-- end;
-- [3] = function(s)
-- return (s:gsub("^%s*(.-)%s*$", "%1"))
-- end;
-- from PiL2 20.4
-- [4] = function(s)
-- return s:match "^%s*(.-)%s*$"
-- end;
-- variant of (match)
-- [5] = function(s)
-- return s:gsub("^%s+", ""):gsub("%s+$", "")
-- end;
-- two gsub's
-- [6] = function(s)
-- return s:match"^%s*(.*)":match"(.-)%s*$"
-- end;
-- variant of (match)
-- [7] = function(s)
-- return s:match'^%s*(.*%S)' or ''
-- end;
-- warning: has bad performance when s:match'^%s*$' and #s is large
[8] = function(s)
return match(s, '^()%s*$') and '' or match(s, '^%s*(.*%S)')
end;
-- fixes performance problem in.
-- note: the '()' avoids the overhead of default string capture.
-- This overhead is small, ~ 10% for successful whitespace match call
-- alone, and may not be noticeable in the overall benchmarks here,
-- but there's little harm either. Instead replacing the first `match`
-- with a `find` has a similar effect, but that requires localizing
-- two functions in the variant below.
[9] = function(s)
return match(s,'^()%s*$') and '' or match(s,'^%s*(.*%S)')
end;
-- variant of (localize functions)
-- [10] = function(s)
-- local i1,i2 = find(s,'^%s*')
-- if i2 >= i1 then
-- s = sub(s,i2+1)
-- end
-- local i1,i2 = find(s,'%s*$')
-- if i2 >= i1 then
-- s = sub(s,1,i1-1)
-- end
-- return s
-- end;
-- based on penlight 0.7.2
-- [11] = function(s)
-- local _, i1 = find(s,'^%s*')
-- local i2 = find(s,'%s*$')
-- return sub(s, i1 + 1, i2 - 1)
-- end;
-- simplification of
-- [12] = function(s)
-- local a = s:match('^%s*()')
-- local b = s:match('()%s*$', a)
-- return s:sub(a,b-1)
-- end;
-- variant of (match)
[13] = function(s)
local n = match(s, "()%S")
return n and match(s, ".*%S", n) or ""
end;
-- variant of (use n position)
-- http://lua-users.org/lists/lua-l/2009-12/msg00904.html
[14] = function(s)
local from = match(s, "^%s*()")
return from > #s and "" or match(s, ".*%S", from)
end;
-- [15] = function(s)
-- return string.match(s, '^%s*(.*%S)%s*$') or ''
-- end;
-- [16] = function(str)
-- return string.match(str, "^%s*(.-)%s*$")
-- end;
[17] = function(str)
-- trim start
return string.match(str, "^%s*(.-)$")
end;
[18] = function(str)
-- trim End
local from = match(str, "^%s*()")
return from > #str and "" or sub(str, 1, match(str, ".*()%S"))
end;
[19] = function(str)
-- trim start
local from = match(str, "^%s*()")
return from > #str and "" or sub(str, from)
end;
[20] = function(str)
-- trim start
return sub(str, match(str, "^%s*()"))
end;
-- [21] = function(str)
-- for i = -1, -#str, -1 do
-- if byte(str, i) ~= 0x20 then
-- return sub(str, 1, i)
-- end
-- end
-- return ""
-- end;
[22] = function(str)
-- trim End
local from = match(str, "^%s*()")
return from > #str and "" or sub(str, 1, match(str, ".*()%S"))
end;
[23] = function(str)
-- trim End
local from = match(str, "^%s*()")
return from > #str and "" or match(str, ".*%S")
end;
[24] = function(str)
-- trim End
local _, from = find(str, "^%s*")
return from == #str and "" or match(str, ".*%S")
end;
-- [25] = function(str)
-- return string.gsub(str, "%f[%s](%s+)$", "")
-- end;
[26] = function(str)
-- trim End
return find(str, "^%s*$") == nil and match(str, ".*%S") or ""
end;
[27] = function(str)
local _, from = string.find(str, "^%s*")
return from == #str and "" or sub(str, from + 1)
end;
}
-- start: 19, trim: 14
local i = R1
local strs = {
"." .. string.rep(" ", i),
"." .. string.rep(" ", i) .. ".",
string.rep(" ", i) .. ".",
string.rep(" ", i),
string.rep(" .", i // 2),
string.rep(".", i)
}
local template = {}
for m = 1, #strs + 1 do template[m] = -(0/0) end
local prevs = setmetatable({}, { __index = function(self, i) local v = table.move(template, 1, #template, 1, {}) self[i] = v return v end })
while i <= R2 do
print(i)
for k, func in pairs(candidates) do
local strs = {
"." .. string.rep(" ", i),
"." .. string.rep(" ", i) .. ".",
string.rep(" ", i) .. ".",
string.rep(" ", i),
string.rep(" .", i // 2),
string.rep(".", i)
}
local sum = 0
for j, str in ipairs(strs) do
local t1 = os.clock()
for _ = 1, 1000 do
func(str)
end
local t2 = os.clock()
local dt = t2 - t1
sum = sum + dt
print("f" .. k, "s" .. j, dt, dt / prevs[k][j])
prevs[k][j] = dt
end
print("\tf" .. k, sum, sum / prevs[k][#strs + 1])
prevs[k][#strs + 1] = sum
end
i = i * 2
end
local t = {}
for k in pairs(candidates) do
table.insert(t, k)
end
table.sort(t, function(a, b)
return prevs[a][#strs + 1] < prevs[b][#strs + 1]
end)
for _, x in ipairs(t) do
print("f" .. x, prevs[x][#strs + 1])
end
print("[" .. table.concat(t, ", ") .. "]")
--[[
1 3e-06 nil
2 9.9999999999992e-07 0.33333333333331
4 1e-06 1.0000000000001
8 1e-06 1.0
16 2e-06 2.0
32 8e-06 3.9999999999999
64 2.5e-05 3.125
128 9e-05 3.6
256 0.000359 3.9888888888889
512 0.001396 3.8885793871866
1024 0.005458 3.9097421203438
2048 0.018888 3.4606082814218
4096 0.074706 3.955209656925
8192 0.298921 4.0012984231521
16384 1.192664 3.9898969961963
32768 4.787684 4.0142772817826
65536 19.25605 4.0219968569354
131072 77.239331 4.0111721251243
--]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment