Skip to content

Instantly share code, notes, and snippets.

@ochaton
Created November 26, 2019 12:19
Show Gist options
  • Save ochaton/dcdabc7fc881e2fa257df7192acac17a to your computer and use it in GitHub Desktop.
Save ochaton/dcdabc7fc881e2fa257df7192acac17a to your computer and use it in GitHub Desktop.
ffi prefix-tree
-- v: 1
local ffi = require 'ffi'
local C = ffi.C
ffi.cdef[[
typedef struct node {
uint8_t nkids;
struct node **kids;
uint32_t sprefix;
uint64_t svalue;
char *prefix;
char *value;
};
void *calloc(size_t count, size_t size);
void free(void *ptr);
void *realloc(void *ptr, size_t size);
]]
local NULL = ffi.cast('void *', 0)
local DBG = false
local function debug(...)
if DBG then
print(...)
end
end
local Node; Node = ffi.metatype('struct node', {
__gc = function(o)
debug("Destroying:", o)
if o.prefix ~= NULL then
debug("free", o.prefix)
C.free(o.prefix)
end
if o.value ~= NULL then
debug("free", o.value)
C.free(o.value)
end
if o.kids ~= NULL then
debug("free", o.kids)
C.free(o.kids)
end
end,
__new = function(ctype, init)
debug("Creating new node", init)
self = ffi.new(ctype)
self.nkids = init.nkids or 0
if init.kids then
self.kids = C.calloc(#init.kids, ffi.sizeof('struct node *'))
for i = 0, init.nkids - 1 do
self.kids[i] = ffi.cast('struct node *', init.kids[i])
end
end
if init.prefix then
self.prefix = C.calloc(#init.prefix, 1)
ffi.copy(self.prefix, init.prefix, #init.prefix)
self.sprefix = #init.prefix
end
if init.value then
self.value = C.calloc(#init.value, 1)
ffi.copy(self.value, init.value, #init.value)
self.svalue = #init.value
end
return self
end,
__tostring = function(self)
return ("Node{ '%s'{%d} => '%s'{%d}, nkids = %d }"):format(
ffi.string(self.prefix, self.sprefix), tonumber(self.sprefix),
ffi.string(self.value, self.svalue), tonumber(self.svalue),
self.nkids
)
end,
__index = {
newkid = function(self, kid)
self.nkids = self.nkids + 1
self.kids = C.realloc(self.kids, self.nkids * ffi.sizeof('struct node *'))
self.kids[self.nkids - 1] = kid
end,
split = function(self, prefix)
assert(type(prefix) == 'string', "prefix must be a lua string")
local right = Node{
prefix = ffi.string(self.prefix + #prefix, self.sprefix - #prefix),
}
right.value = self.value
right.svalue = self.svalue
self.prefix = C.realloc(NULL, #prefix)
self.sprefix = #prefix
ffi.copy(self.prefix, prefix, #prefix)
self.value = NULL
self.svalue = 0
right.kids = self.kids
right.nkids = self.nkids
self.kids = C.realloc(NULL, ffi.sizeof('struct node *'))
self.nkids = 1
self.kids[0] = right
return right
end
}
})
ffi.cdef[[
typedef struct tree {
struct node *root;
};
]]
local function common_prefix(p1, np1, p2, np2)
local pe1 = p1 + np1
local pe2 = p2 + np2
while p1 < pe1 and p2 < pe2 and p1[0] == p2[0] do
p1 = p1 + 1
p2 = p2 + 1
end
return p1, p2
end
local Tree = ffi.metatype('struct tree', {
__new = function(ctype, init)
return ffi.new(
ctype, {
root = Node {
prefix = "",
value = "",
nkids = 0
}
}
)
end,
__index = {
lookup = function(self, prefix)
if type(prefix) == 'string' then
prefix = ffi.new('char [?]', #prefix, prefix)
elseif type(prefix) ~= 'cdata' then
error("Expected string or ctype", 2)
end
local p = ffi.cast('char *', prefix)
local pe = p + ffi.sizeof(prefix)
local node = self.root
repeat
debug("Searching", ffi.string(p, pe - p), "in", node, node.prefix)
local suffix1, suffix2 = common_prefix(node.prefix, node.sprefix, p, pe - p)
if suffix1 < node.prefix + node.sprefix then
-- prefix of node wasn't satisfied
local pref = ffi.string(p, suffix2 - p) -- common prefix
local suf1 = ffi.string(suffix1, node.sprefix - (suffix1 - node.prefix)) -- suffix of node
local suf2 = ffi.string(suffix2, pe - suffix2) -- suffix of string to find
debug("node", node, "partially satisfied given prefix.", pref, suf1, suf2)
return node, pref, suf1, suf2
end
-- node fully satisfied prefix
if suffix2 == pe then
-- and prefix over
debug("full sat: ", node, suffix2)
return node, ffi.string(node.prefix, node.sprefix), "", ""
end
p = suffix2
-- prefix is not over yet. we must check kids
local kid
for i = 0, node.nkids - 1 do
debug("checking kid", node.kids[i], "with", ffi.string(p, pe - p))
if node.kids[i].prefix[0] == p[0] then
kid = node.kids[i]
break
end
end
if kid == nil then
-- kid not found
return node, ffi.string(node.prefix, node.sprefix), "", ffi.string(suffix2, pe - p)
end
-- kid found! We will check it!
node = kid
until p == pe or node == NULL
error("Loop must never come here", 2)
end,
insert = function(self, prefix, value)
local closest, pref, suf1, suf2 = self:lookup(prefix)
if suf1 == "" and suf2 == "" then
return false, ("Node with prefix %s already exists"):format(pref)
end
if suf1 == "" then
-- suf2 ~= "" -- insert new node
local node = Node{ prefix = suf2, value = tostring(value), nkids = 0 }
closest:newkid(node)
return true
end
-- suf1 ~= "":
-- 1. we need to split the node
local kidney = closest:split(pref)
if suf2 ~= "" then
-- We need to insert new node
local node = Node{ prefix = suf2, value = tostring(value), nkids = 0 }
closest:newkid(node)
return true
end
-- otherwise prefix is a subprefix of existing node:
closest.value = ffi.cast('char [?]', #value, value)
return true
end,
}
})
local T = Tree()
debug(T, T.root)
debug(T:lookup("prefix"))
debug("insert", T:insert("prefix", "v1"))
debug(T:lookup("prefix"))
debug("insert preb", T:insert("preb", "v2"))
debug(T:lookup("prefix"))
debug(T:lookup("preb"))
collectgarbage('collect')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment