Created
March 1, 2021 08:19
-
-
Save Validark/c86a3e92dde76d26e6893d796dfea89a to your computer and use it in GitHub Desktop.
This monstrosity is a giant test bench for figuring out which regular expression-based functions take linear or quadratic time. It can also be used just for performance comparisons
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| local R1 = 2^10 | |
| local R2 = 2^11 -- the size of strings you want to test | |
| local find = string.find | |
| local sub = string.sub | |
| local gsub = string.gsub | |
| local match = string.match | |
| local gmatch = string.gmatch | |
| local byte = string.byte | |
| local candidates = { | |
| [1] = function(str) | |
| return match(str, ".*%S", match(str, "^%s*()")) | |
| end; | |
| -- [2] = function(str) | |
| -- local start = match(str, "^%s*()") | |
| -- local last = start | |
| -- for k in gmatch(str, "%S+()") do | |
| -- last = k | |
| -- end | |
| -- return sub(str, start, last) | |
| -- end; | |
| -- [3] = function(s) | |
| -- return (s:gsub("^%s*(.-)%s*$", "%1")) | |
| -- end; | |
| -- from PiL2 20.4 | |
| -- [4] = function(s) | |
| -- return s:match "^%s*(.-)%s*$" | |
| -- end; | |
| -- variant of (match) | |
| -- [5] = function(s) | |
| -- return s:gsub("^%s+", ""):gsub("%s+$", "") | |
| -- end; | |
| -- two gsub's | |
| -- [6] = function(s) | |
| -- return s:match"^%s*(.*)":match"(.-)%s*$" | |
| -- end; | |
| -- variant of (match) | |
| -- [7] = function(s) | |
| -- return s:match'^%s*(.*%S)' or '' | |
| -- end; | |
| -- warning: has bad performance when s:match'^%s*$' and #s is large | |
| [8] = function(s) | |
| return match(s, '^()%s*$') and '' or match(s, '^%s*(.*%S)') | |
| end; | |
| -- fixes performance problem in. | |
| -- note: the '()' avoids the overhead of default string capture. | |
| -- This overhead is small, ~ 10% for successful whitespace match call | |
| -- alone, and may not be noticeable in the overall benchmarks here, | |
| -- but there's little harm either. Instead replacing the first `match` | |
| -- with a `find` has a similar effect, but that requires localizing | |
| -- two functions in the variant below. | |
| [9] = function(s) | |
| return match(s,'^()%s*$') and '' or match(s,'^%s*(.*%S)') | |
| end; | |
| -- variant of (localize functions) | |
| -- [10] = function(s) | |
| -- local i1,i2 = find(s,'^%s*') | |
| -- if i2 >= i1 then | |
| -- s = sub(s,i2+1) | |
| -- end | |
| -- local i1,i2 = find(s,'%s*$') | |
| -- if i2 >= i1 then | |
| -- s = sub(s,1,i1-1) | |
| -- end | |
| -- return s | |
| -- end; | |
| -- based on penlight 0.7.2 | |
| -- [11] = function(s) | |
| -- local _, i1 = find(s,'^%s*') | |
| -- local i2 = find(s,'%s*$') | |
| -- return sub(s, i1 + 1, i2 - 1) | |
| -- end; | |
| -- simplification of | |
| -- [12] = function(s) | |
| -- local a = s:match('^%s*()') | |
| -- local b = s:match('()%s*$', a) | |
| -- return s:sub(a,b-1) | |
| -- end; | |
| -- variant of (match) | |
| [13] = function(s) | |
| local n = match(s, "()%S") | |
| return n and match(s, ".*%S", n) or "" | |
| end; | |
| -- variant of (use n position) | |
| -- http://lua-users.org/lists/lua-l/2009-12/msg00904.html | |
| [14] = function(s) | |
| local from = match(s, "^%s*()") | |
| return from > #s and "" or match(s, ".*%S", from) | |
| end; | |
| -- [15] = function(s) | |
| -- return string.match(s, '^%s*(.*%S)%s*$') or '' | |
| -- end; | |
| -- [16] = function(str) | |
| -- return string.match(str, "^%s*(.-)%s*$") | |
| -- end; | |
| [17] = function(str) | |
| -- trim start | |
| return string.match(str, "^%s*(.-)$") | |
| end; | |
| [18] = function(str) | |
| -- trim End | |
| local from = match(str, "^%s*()") | |
| return from > #str and "" or sub(str, 1, match(str, ".*()%S")) | |
| end; | |
| [19] = function(str) | |
| -- trim start | |
| local from = match(str, "^%s*()") | |
| return from > #str and "" or sub(str, from) | |
| end; | |
| [20] = function(str) | |
| -- trim start | |
| return sub(str, match(str, "^%s*()")) | |
| end; | |
| -- [21] = function(str) | |
| -- for i = -1, -#str, -1 do | |
| -- if byte(str, i) ~= 0x20 then | |
| -- return sub(str, 1, i) | |
| -- end | |
| -- end | |
| -- return "" | |
| -- end; | |
| [22] = function(str) | |
| -- trim End | |
| local from = match(str, "^%s*()") | |
| return from > #str and "" or sub(str, 1, match(str, ".*()%S")) | |
| end; | |
| [23] = function(str) | |
| -- trim End | |
| local from = match(str, "^%s*()") | |
| return from > #str and "" or match(str, ".*%S") | |
| end; | |
| [24] = function(str) | |
| -- trim End | |
| local _, from = find(str, "^%s*") | |
| return from == #str and "" or match(str, ".*%S") | |
| end; | |
| -- [25] = function(str) | |
| -- return string.gsub(str, "%f[%s](%s+)$", "") | |
| -- end; | |
| [26] = function(str) | |
| -- trim End | |
| return find(str, "^%s*$") == nil and match(str, ".*%S") or "" | |
| end; | |
| [27] = function(str) | |
| local _, from = string.find(str, "^%s*") | |
| return from == #str and "" or sub(str, from + 1) | |
| end; | |
| } | |
| -- start: 19, trim: 14 | |
| local i = R1 | |
| local strs = { | |
| "." .. string.rep(" ", i), | |
| "." .. string.rep(" ", i) .. ".", | |
| string.rep(" ", i) .. ".", | |
| string.rep(" ", i), | |
| string.rep(" .", i // 2), | |
| string.rep(".", i) | |
| } | |
| local template = {} | |
| for m = 1, #strs + 1 do template[m] = -(0/0) end | |
| local prevs = setmetatable({}, { __index = function(self, i) local v = table.move(template, 1, #template, 1, {}) self[i] = v return v end }) | |
| while i <= R2 do | |
| print(i) | |
| for k, func in pairs(candidates) do | |
| local strs = { | |
| "." .. string.rep(" ", i), | |
| "." .. string.rep(" ", i) .. ".", | |
| string.rep(" ", i) .. ".", | |
| string.rep(" ", i), | |
| string.rep(" .", i // 2), | |
| string.rep(".", i) | |
| } | |
| local sum = 0 | |
| for j, str in ipairs(strs) do | |
| local t1 = os.clock() | |
| for _ = 1, 1000 do | |
| func(str) | |
| end | |
| local t2 = os.clock() | |
| local dt = t2 - t1 | |
| sum = sum + dt | |
| print("f" .. k, "s" .. j, dt, dt / prevs[k][j]) | |
| prevs[k][j] = dt | |
| end | |
| print("\tf" .. k, sum, sum / prevs[k][#strs + 1]) | |
| prevs[k][#strs + 1] = sum | |
| end | |
| i = i * 2 | |
| end | |
| local t = {} | |
| for k in pairs(candidates) do | |
| table.insert(t, k) | |
| end | |
| table.sort(t, function(a, b) | |
| return prevs[a][#strs + 1] < prevs[b][#strs + 1] | |
| end) | |
| for _, x in ipairs(t) do | |
| print("f" .. x, prevs[x][#strs + 1]) | |
| end | |
| print("[" .. table.concat(t, ", ") .. "]") | |
| --[[ | |
| 1 3e-06 nil | |
| 2 9.9999999999992e-07 0.33333333333331 | |
| 4 1e-06 1.0000000000001 | |
| 8 1e-06 1.0 | |
| 16 2e-06 2.0 | |
| 32 8e-06 3.9999999999999 | |
| 64 2.5e-05 3.125 | |
| 128 9e-05 3.6 | |
| 256 0.000359 3.9888888888889 | |
| 512 0.001396 3.8885793871866 | |
| 1024 0.005458 3.9097421203438 | |
| 2048 0.018888 3.4606082814218 | |
| 4096 0.074706 3.955209656925 | |
| 8192 0.298921 4.0012984231521 | |
| 16384 1.192664 3.9898969961963 | |
| 32768 4.787684 4.0142772817826 | |
| 65536 19.25605 4.0219968569354 | |
| 131072 77.239331 4.0111721251243 | |
| --]] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment