Created
June 2, 2020 19:35
-
-
Save davisp/21d6217619ae511ad319e32ab24eea44 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -module(ebtree). | |
| -export([ | |
| new/1, | |
| to_list/1, | |
| insert/3, | |
| remove/2, | |
| lookup/2, | |
| lookup/3, | |
| show_nodes/1 | |
| ]). | |
| -ifdef(TEST). | |
| -export([ | |
| run_test/1, | |
| run_test/2 | |
| ]). | |
| -endif. | |
| -record(ebtree, { | |
| min_degree, | |
| min_keys, | |
| max_keys, | |
| root, | |
| tab | |
| }). | |
| -record(node, { | |
| id = erlang:unique_integer([positive, monotonic]), | |
| level = 0, | |
| members = [] | |
| }). | |
| new(MinDegree) when is_integer(MinDegree), MinDegree >= 2 -> | |
| Tab = ets:new(ebtree, [{keypos, #node.id}]), | |
| Root = #node{}, | |
| Tree = #ebtree{ | |
| min_degree = MinDegree, | |
| min_keys = MinDegree - 1, | |
| max_keys = 2 * MinDegree - 1, | |
| root = Root#node.id, | |
| tab = Tab | |
| }, | |
| store_node(Tree, Root), | |
| Tree. | |
| insert(#ebtree{} = Tree, Key, Value) -> | |
| Root = read_node(Tree, Tree#ebtree.root), | |
| NewRoot = insert(Tree, Root, Key, Value), | |
| case length(NewRoot#node.members) > Tree#ebtree.max_keys of | |
| true -> | |
| NextRoot = #node{ | |
| level = Root#node.level + 1, | |
| members = split(Tree, NewRoot) | |
| }, | |
| store_node(Tree#ebtree{root = NextRoot#node.id}, NextRoot), | |
| {ok, Tree#ebtree{root = NextRoot#node.id}}; | |
| false when NewRoot == Root -> | |
| {ok, Tree}; | |
| false when NewRoot#node.id == Tree#ebtree.root -> | |
| store_node(Tree, NewRoot), | |
| {ok, Tree} | |
| end. | |
| insert(_Tree, #node{level = 0} = Node, Key, Value) -> | |
| % The behavior of usort here is guaranteed | |
| % to remove all but the first occurence, thus | |
| % insertion of a key that exists already exists | |
| % behaves as if we overwrote the new value. | |
| Node#node{ | |
| members = lists:ukeysort(1, [{Key, Value} | Node#node.members]) | |
| }; | |
| insert(Tree, #node{level = L} = Node, Key, Value) when L > 0 -> | |
| {_ChildKey, ChildId, RestMembers} = extract(Key, Node#node.members), | |
| Child = read_node(Tree, ChildId), | |
| NewChild = insert(Tree, Child, Key, Value), | |
| ToAdd = case length(NewChild#node.members) > Tree#ebtree.max_keys of | |
| true -> split(Tree, NewChild); | |
| false -> store_nodes(Tree, [NewChild]) | |
| end, | |
| % Notice we're not doing a usort here so that | |
| % we don't cover up bugs with duplicate keys. | |
| Node#node{ | |
| members = lists:sort(ToAdd ++ RestMembers) | |
| }. | |
| remove(Tree, Key) -> | |
| Root = read_node(Tree, Tree#ebtree.root), | |
| NewRoot = remove(Tree, Root, Key), | |
| store_node(Tree, NewRoot), | |
| {ok, Tree#ebtree{root = NewRoot#node.id}}. | |
| remove(_Tree, #node{level = 0} = Node, Key) -> | |
| Node#node{ | |
| members = lists:keydelete(Key, 1, Node#node.members) | |
| }; | |
| remove(Tree, Node, Key) -> | |
| {ChildKey, ChildId, RestMembers} = extract(Key, Node#node.members), | |
| Child = read_node(Tree, ChildId), | |
| NewChild = remove(Tree, Child, Key), | |
| case length(NewChild#node.members) >= Tree#ebtree.min_keys of | |
| true when NewChild == Child -> | |
| Node; | |
| true -> | |
| ToAdd = store_nodes(Tree, [NewChild]), | |
| Node#node{ | |
| members = lists:sort(ToAdd ++ RestMembers) | |
| }; | |
| false -> | |
| remove_node(Tree, ChildId), | |
| {_MKey, MergeId, RestRestMembers} = extract(ChildKey, RestMembers), | |
| Merge = read_node(Tree, MergeId), | |
| Combined = NewChild#node.members ++ Merge#node.members, | |
| MergeAdd = join(Tree, Merge#node{members = lists:sort(Combined)}), | |
| Node#node{ | |
| members = lists:sort(MergeAdd ++ RestRestMembers) | |
| } | |
| end. | |
| lookup(Tree, Key) -> | |
| lookup(Tree, Key, undefined). | |
| lookup(Tree, Key, Default) -> | |
| Root = read_node(Tree, Tree#ebtree.root), | |
| lookup(Tree, Root, Key, Default). | |
| lookup(_Tree, #node{level = 0} = Node, Key, Default) -> | |
| case lists:keyfind(Key, 1, Node#node.members) of | |
| {Key, Value} -> | |
| Value; | |
| false -> | |
| Default | |
| end; | |
| lookup(Tree, Node, Key, Default) -> | |
| {_, ChildId, _} = extract(Key, Node#node.members), | |
| Child = read_node(Tree, ChildId), | |
| lookup(Tree, Child, Key, Default). | |
| to_list(Tree) -> | |
| Root = read_node(Tree, Tree#ebtree.root), | |
| lists:flatten(to_list(Tree, Root)). | |
| to_list(_Tree, #node{level = 0} = Node) -> | |
| Node#node.members; | |
| to_list(Tree, #node{} = Node) -> | |
| lists:map(fun({_, ChildId}) -> | |
| Child = read_node(Tree, ChildId), | |
| to_list(Tree, Child) | |
| end, Node#node.members). | |
| show_nodes(#ebtree{} = Tree) -> | |
| lists:sort(ets:tab2list(Tree#ebtree.tab)). | |
| extract(Key, KVs) -> | |
| Tail = lists:dropwhile(fun({K, _}) -> | |
| K < Key | |
| end, KVs), | |
| {ChildKey, ChildId} = case Tail of | |
| [KV | _] -> KV; | |
| [] -> lists:last(KVs) | |
| end, | |
| {ChildKey, ChildId, lists:keydelete(ChildKey, 1, KVs)}. | |
| split(Tree, Node) -> | |
| {Left, Right} = lists:split(Tree#ebtree.min_keys, Node#node.members), | |
| LNode = #node{ | |
| level = Node#node.level, | |
| members = Left | |
| }, | |
| RNode = Node#node{ | |
| members = Right | |
| }, | |
| store_nodes(Tree, [LNode, RNode]). | |
| join(Tree, Node) -> | |
| case length(Node#node.members) > Tree#ebtree.max_keys of | |
| true -> | |
| split(Tree, Node); | |
| false -> | |
| store_nodes(Tree, [Node]) | |
| end. | |
| read_node(#ebtree{} = Tree, Id) -> | |
| case ets:lookup(Tree#ebtree.tab, Id) of | |
| [#node{} = Node] -> | |
| Node; | |
| [] -> | |
| error(not_found) | |
| end. | |
| remove_node(#ebtree{} = Tree, Id) -> | |
| case ets:lookup(Tree#ebtree.tab, Id) of | |
| [#node{}] -> | |
| true = ets:delete(Tree#ebtree.tab, Id); | |
| [] -> | |
| error(not_found) | |
| end. | |
| store_nodes(Tree, Nodes) -> | |
| lists:map(fun(Node) -> | |
| store_node(Tree, Node), | |
| {K, _} = lists:last(Node#node.members), | |
| {K, Node#node.id} | |
| end, Nodes). | |
| store_node(#ebtree{} = Tree, #node{} = Node) -> | |
| validate_node(Tree, Node), | |
| ets:insert(Tree#ebtree.tab, [Node]). | |
| validate_node(#ebtree{root = RootId} = Tree, #node{id = NodeId} = Node) -> | |
| NumKeys = length(Node#node.members), | |
| {Keys, Values} = lists:unzip(Node#node.members), | |
| SortedKeys = lists:usort(Keys), | |
| if | |
| NumKeys < Tree#ebtree.min_keys andalso RootId /= NodeId -> | |
| erlang:error({too_few_keys, Node}); | |
| NumKeys > Tree#ebtree.max_keys -> | |
| erlang:error({too_many_keys, Node}); | |
| SortedKeys /= Keys -> | |
| erlang:error({key_sort_error, Node}); | |
| true -> | |
| ok | |
| end, | |
| if Node#node.level == 0 -> ok; true -> | |
| lists:foreach(fun(Value) -> | |
| case ets:member(Tree#ebtree.tab, Value) of | |
| true -> | |
| ok; | |
| false -> | |
| erlang:error({invalid_child, Value}) | |
| end | |
| end, Values) | |
| end. | |
| -ifdef(TEST). | |
| -include_lib("eunit/include/eunit.hrl"). | |
| run_test() -> | |
| run_test(3, 1000). | |
| run_test(MinDegree) -> | |
| run_test(MinDegree, 1000). | |
| run_test(MinDegree, Iterations) -> | |
| Tree = new(MinDegree), | |
| Model = ets:new(ebtree_test, []), | |
| run_test(Tree, Iterations, Model), | |
| run_empty(Tree, Model). | |
| run_test(Tree, 0, _) -> | |
| % Test over | |
| Tree; | |
| run_test(Tree, Iters, Model) when Iters > 0 -> | |
| Ops = [ | |
| {0.6, fun test_insert_new/2}, | |
| {0.2, fun test_insert_old/2}, | |
| {0.1, fun test_remove/2}, | |
| {0.09, fun test_lookup_exists/2}, | |
| {0.01, fun test_lookup_missing/2} | |
| ], | |
| Fun = choose(Ops), | |
| NewTree = Fun(Tree, Model), | |
| case Iters rem 100 of | |
| 0 -> | |
| TreeL = ?MODULE:to_list(NewTree), | |
| ModelL = lists:sort(ets:tab2list(Model)), | |
| ?assertEqual(ModelL, TreeL); | |
| _ -> | |
| ok | |
| end, | |
| run_test(NewTree, Iters - 1, Model). | |
| run_empty(Tree, Model) -> | |
| EmptyTree = ets:foldl(fun({K, _}, TAcc) -> | |
| {ok, T} = ?MODULE:remove(TAcc, K), | |
| T | |
| end, Tree, Model), | |
| ?assertEqual([], ?MODULE:to_list(EmptyTree)). | |
| test_insert_new(Tree, Model) -> | |
| K = new_key(Model), | |
| V = rand:uniform(), | |
| {ok, NewTree} = ?MODULE:insert(Tree, K, V), | |
| true = ets:insert_new(Model, {K, V}), | |
| NewTree. | |
| test_insert_old(Tree, Model) -> | |
| case ets:info(Model, size) > 0 of | |
| true -> | |
| K = old_key(Model), | |
| V = rand:uniform(), | |
| {ok, NewTree} = ?MODULE:insert(Tree, K, V), | |
| true = ets:insert(Model, {K, V}), | |
| NewTree; | |
| false -> | |
| Tree | |
| end. | |
| test_remove(Tree, Model) -> | |
| case ets:info(Model, size) > 0 of | |
| true -> | |
| K = old_key(Model), | |
| {ok, NewTree} = ?MODULE:remove(Tree, K), | |
| true = ets:delete(Model, K), | |
| NewTree; | |
| false -> | |
| Tree | |
| end. | |
| test_lookup_exists(Tree, Model) -> | |
| case ets:info(Model, size) > 0 of | |
| true -> | |
| K = old_key(Model), | |
| TV = ?MODULE:lookup(Tree, K, not_found), | |
| [{_, MV}] = ets:lookup(Model, K), | |
| ?assertEqual(MV, TV), | |
| Tree; | |
| false -> | |
| Tree | |
| end. | |
| test_lookup_missing(Tree, Model) -> | |
| K = new_key(Model), | |
| ?assertEqual(not_found, ?MODULE:lookup(Tree, K, not_found)), | |
| ?assertEqual([], ets:lookup(Model, K)), | |
| Tree. | |
| new_key(Model) -> | |
| K = rand:uniform(), | |
| case ets:member(Model, K) of | |
| true -> | |
| % Quite unlikely... | |
| new_key(Model); | |
| false -> | |
| K | |
| end. | |
| old_key(Model) -> | |
| {Key, _} = ets:foldl(fun({K, _}, Acc) -> | |
| case Acc of | |
| undefined -> | |
| {K, rand:uniform()}; | |
| {Old, V} -> | |
| R = rand:uniform(), | |
| case R > V of | |
| true -> {K, R}; | |
| false -> {Old, V} | |
| end | |
| end | |
| end, undefined, Model), | |
| Key. | |
| choose(Ops) -> | |
| K = rand:uniform(), | |
| choose(K, Ops). | |
| choose(_, []) -> | |
| erlang:error(invalid_op_probabilities); | |
| choose(K, [{P, _V} | Rest]) when K > P -> | |
| choose(K - P, Rest); | |
| choose(_K, [{_P, V} | _]) -> | |
| V. | |
| -endif. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment