Skip to content

Instantly share code, notes, and snippets.

@davisp
Created June 2, 2020 19:35
Show Gist options
  • Select an option

  • Save davisp/21d6217619ae511ad319e32ab24eea44 to your computer and use it in GitHub Desktop.

Select an option

Save davisp/21d6217619ae511ad319e32ab24eea44 to your computer and use it in GitHub Desktop.
-module(ebtree).
-export([
new/1,
to_list/1,
insert/3,
remove/2,
lookup/2,
lookup/3,
show_nodes/1
]).
-ifdef(TEST).
-export([
run_test/1,
run_test/2
]).
-endif.
-record(ebtree, {
min_degree,
min_keys,
max_keys,
root,
tab
}).
-record(node, {
id = erlang:unique_integer([positive, monotonic]),
level = 0,
members = []
}).
new(MinDegree) when is_integer(MinDegree), MinDegree >= 2 ->
Tab = ets:new(ebtree, [{keypos, #node.id}]),
Root = #node{},
Tree = #ebtree{
min_degree = MinDegree,
min_keys = MinDegree - 1,
max_keys = 2 * MinDegree - 1,
root = Root#node.id,
tab = Tab
},
store_node(Tree, Root),
Tree.
insert(#ebtree{} = Tree, Key, Value) ->
Root = read_node(Tree, Tree#ebtree.root),
NewRoot = insert(Tree, Root, Key, Value),
case length(NewRoot#node.members) > Tree#ebtree.max_keys of
true ->
NextRoot = #node{
level = Root#node.level + 1,
members = split(Tree, NewRoot)
},
store_node(Tree#ebtree{root = NextRoot#node.id}, NextRoot),
{ok, Tree#ebtree{root = NextRoot#node.id}};
false when NewRoot == Root ->
{ok, Tree};
false when NewRoot#node.id == Tree#ebtree.root ->
store_node(Tree, NewRoot),
{ok, Tree}
end.
insert(_Tree, #node{level = 0} = Node, Key, Value) ->
% The behavior of usort here is guaranteed
% to remove all but the first occurence, thus
% insertion of a key that exists already exists
% behaves as if we overwrote the new value.
Node#node{
members = lists:ukeysort(1, [{Key, Value} | Node#node.members])
};
insert(Tree, #node{level = L} = Node, Key, Value) when L > 0 ->
{_ChildKey, ChildId, RestMembers} = extract(Key, Node#node.members),
Child = read_node(Tree, ChildId),
NewChild = insert(Tree, Child, Key, Value),
ToAdd = case length(NewChild#node.members) > Tree#ebtree.max_keys of
true -> split(Tree, NewChild);
false -> store_nodes(Tree, [NewChild])
end,
% Notice we're not doing a usort here so that
% we don't cover up bugs with duplicate keys.
Node#node{
members = lists:sort(ToAdd ++ RestMembers)
}.
remove(Tree, Key) ->
Root = read_node(Tree, Tree#ebtree.root),
NewRoot = remove(Tree, Root, Key),
store_node(Tree, NewRoot),
{ok, Tree#ebtree{root = NewRoot#node.id}}.
remove(_Tree, #node{level = 0} = Node, Key) ->
Node#node{
members = lists:keydelete(Key, 1, Node#node.members)
};
remove(Tree, Node, Key) ->
{ChildKey, ChildId, RestMembers} = extract(Key, Node#node.members),
Child = read_node(Tree, ChildId),
NewChild = remove(Tree, Child, Key),
case length(NewChild#node.members) >= Tree#ebtree.min_keys of
true when NewChild == Child ->
Node;
true ->
ToAdd = store_nodes(Tree, [NewChild]),
Node#node{
members = lists:sort(ToAdd ++ RestMembers)
};
false ->
remove_node(Tree, ChildId),
{_MKey, MergeId, RestRestMembers} = extract(ChildKey, RestMembers),
Merge = read_node(Tree, MergeId),
Combined = NewChild#node.members ++ Merge#node.members,
MergeAdd = join(Tree, Merge#node{members = lists:sort(Combined)}),
Node#node{
members = lists:sort(MergeAdd ++ RestRestMembers)
}
end.
lookup(Tree, Key) ->
lookup(Tree, Key, undefined).
lookup(Tree, Key, Default) ->
Root = read_node(Tree, Tree#ebtree.root),
lookup(Tree, Root, Key, Default).
lookup(_Tree, #node{level = 0} = Node, Key, Default) ->
case lists:keyfind(Key, 1, Node#node.members) of
{Key, Value} ->
Value;
false ->
Default
end;
lookup(Tree, Node, Key, Default) ->
{_, ChildId, _} = extract(Key, Node#node.members),
Child = read_node(Tree, ChildId),
lookup(Tree, Child, Key, Default).
to_list(Tree) ->
Root = read_node(Tree, Tree#ebtree.root),
lists:flatten(to_list(Tree, Root)).
to_list(_Tree, #node{level = 0} = Node) ->
Node#node.members;
to_list(Tree, #node{} = Node) ->
lists:map(fun({_, ChildId}) ->
Child = read_node(Tree, ChildId),
to_list(Tree, Child)
end, Node#node.members).
show_nodes(#ebtree{} = Tree) ->
lists:sort(ets:tab2list(Tree#ebtree.tab)).
extract(Key, KVs) ->
Tail = lists:dropwhile(fun({K, _}) ->
K < Key
end, KVs),
{ChildKey, ChildId} = case Tail of
[KV | _] -> KV;
[] -> lists:last(KVs)
end,
{ChildKey, ChildId, lists:keydelete(ChildKey, 1, KVs)}.
split(Tree, Node) ->
{Left, Right} = lists:split(Tree#ebtree.min_keys, Node#node.members),
LNode = #node{
level = Node#node.level,
members = Left
},
RNode = Node#node{
members = Right
},
store_nodes(Tree, [LNode, RNode]).
join(Tree, Node) ->
case length(Node#node.members) > Tree#ebtree.max_keys of
true ->
split(Tree, Node);
false ->
store_nodes(Tree, [Node])
end.
read_node(#ebtree{} = Tree, Id) ->
case ets:lookup(Tree#ebtree.tab, Id) of
[#node{} = Node] ->
Node;
[] ->
error(not_found)
end.
remove_node(#ebtree{} = Tree, Id) ->
case ets:lookup(Tree#ebtree.tab, Id) of
[#node{}] ->
true = ets:delete(Tree#ebtree.tab, Id);
[] ->
error(not_found)
end.
store_nodes(Tree, Nodes) ->
lists:map(fun(Node) ->
store_node(Tree, Node),
{K, _} = lists:last(Node#node.members),
{K, Node#node.id}
end, Nodes).
store_node(#ebtree{} = Tree, #node{} = Node) ->
validate_node(Tree, Node),
ets:insert(Tree#ebtree.tab, [Node]).
validate_node(#ebtree{root = RootId} = Tree, #node{id = NodeId} = Node) ->
NumKeys = length(Node#node.members),
{Keys, Values} = lists:unzip(Node#node.members),
SortedKeys = lists:usort(Keys),
if
NumKeys < Tree#ebtree.min_keys andalso RootId /= NodeId ->
erlang:error({too_few_keys, Node});
NumKeys > Tree#ebtree.max_keys ->
erlang:error({too_many_keys, Node});
SortedKeys /= Keys ->
erlang:error({key_sort_error, Node});
true ->
ok
end,
if Node#node.level == 0 -> ok; true ->
lists:foreach(fun(Value) ->
case ets:member(Tree#ebtree.tab, Value) of
true ->
ok;
false ->
erlang:error({invalid_child, Value})
end
end, Values)
end.
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
run_test() ->
run_test(3, 1000).
run_test(MinDegree) ->
run_test(MinDegree, 1000).
run_test(MinDegree, Iterations) ->
Tree = new(MinDegree),
Model = ets:new(ebtree_test, []),
run_test(Tree, Iterations, Model),
run_empty(Tree, Model).
run_test(Tree, 0, _) ->
% Test over
Tree;
run_test(Tree, Iters, Model) when Iters > 0 ->
Ops = [
{0.6, fun test_insert_new/2},
{0.2, fun test_insert_old/2},
{0.1, fun test_remove/2},
{0.09, fun test_lookup_exists/2},
{0.01, fun test_lookup_missing/2}
],
Fun = choose(Ops),
NewTree = Fun(Tree, Model),
case Iters rem 100 of
0 ->
TreeL = ?MODULE:to_list(NewTree),
ModelL = lists:sort(ets:tab2list(Model)),
?assertEqual(ModelL, TreeL);
_ ->
ok
end,
run_test(NewTree, Iters - 1, Model).
run_empty(Tree, Model) ->
EmptyTree = ets:foldl(fun({K, _}, TAcc) ->
{ok, T} = ?MODULE:remove(TAcc, K),
T
end, Tree, Model),
?assertEqual([], ?MODULE:to_list(EmptyTree)).
test_insert_new(Tree, Model) ->
K = new_key(Model),
V = rand:uniform(),
{ok, NewTree} = ?MODULE:insert(Tree, K, V),
true = ets:insert_new(Model, {K, V}),
NewTree.
test_insert_old(Tree, Model) ->
case ets:info(Model, size) > 0 of
true ->
K = old_key(Model),
V = rand:uniform(),
{ok, NewTree} = ?MODULE:insert(Tree, K, V),
true = ets:insert(Model, {K, V}),
NewTree;
false ->
Tree
end.
test_remove(Tree, Model) ->
case ets:info(Model, size) > 0 of
true ->
K = old_key(Model),
{ok, NewTree} = ?MODULE:remove(Tree, K),
true = ets:delete(Model, K),
NewTree;
false ->
Tree
end.
test_lookup_exists(Tree, Model) ->
case ets:info(Model, size) > 0 of
true ->
K = old_key(Model),
TV = ?MODULE:lookup(Tree, K, not_found),
[{_, MV}] = ets:lookup(Model, K),
?assertEqual(MV, TV),
Tree;
false ->
Tree
end.
test_lookup_missing(Tree, Model) ->
K = new_key(Model),
?assertEqual(not_found, ?MODULE:lookup(Tree, K, not_found)),
?assertEqual([], ets:lookup(Model, K)),
Tree.
new_key(Model) ->
K = rand:uniform(),
case ets:member(Model, K) of
true ->
% Quite unlikely...
new_key(Model);
false ->
K
end.
old_key(Model) ->
{Key, _} = ets:foldl(fun({K, _}, Acc) ->
case Acc of
undefined ->
{K, rand:uniform()};
{Old, V} ->
R = rand:uniform(),
case R > V of
true -> {K, R};
false -> {Old, V}
end
end
end, undefined, Model),
Key.
choose(Ops) ->
K = rand:uniform(),
choose(K, Ops).
choose(_, []) ->
erlang:error(invalid_op_probabilities);
choose(K, [{P, _V} | Rest]) when K > P ->
choose(K - P, Rest);
choose(_K, [{_P, V} | _]) ->
V.
-endif.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment