Skip to content

Instantly share code, notes, and snippets.

@jsbattig
Last active December 24, 2015 03:43
Show Gist options
  • Save jsbattig/5a65c2f153539ddc3b9c to your computer and use it in GitHub Desktop.
Save jsbattig/5a65c2f153539ddc3b9c to your computer and use it in GitHub Desktop.
Pointer-Trie implementation for fast insertion, finding, and removal of Pointers in a space-efficient manner (about 30% space efficiency in average)
unit PointerTrie;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils;
const
BitsPerByte = 8;
BitsForChildIndexPerBucket = 4;
BucketMask = $F;
PointerTrieDepth = sizeof(Pointer) * BitsPerByte div BitsForChildIndexPerBucket;
ChildrenPerBucket = BitsForChildIndexPerBucket * BitsForChildIndexPerBucket;
LastMidBranchNode = PointerTrieDepth - 3;
type
PPointerTrieBaseNode = ^TPointerTrieBaseNode;
TPointerTrieBaseNode = record
ChildrenCount : Word;
Busy : Word;
end;
PPointerTrieBranchNode = ^TPointerTrieBranchNode;
TPointerTrieNodeArray = array[0..ChildrenPerBucket - 1] of PPointerTrieBranchNode;
PPointerTrieNodeArray = ^TPointerTrieNodeArray;
TPointerTrieBranchNode = record
Base : TPointerTrieBaseNode;
ChildIndex : Int64;
Children : Pointer;
end;
PPointerTrieLeafNode = ^TPointerTrieLeafNode;
TPointerTrieLeafNode = record
Base : TPointerTrieBaseNode;
end;
TPointerTrieLeafNodeArray = array[0..ChildrenPerBucket - 1] of TPointerTrieLeafNode;
PPointerTrieLeafNodeArray = ^TPointerTrieLeafNodeArray;
TPointerTrieStats = record
NodeCount : Integer;
TotalMemAlloced : Int64;
end;
TPointerTrieIterator = record
AtEnd : Boolean;
Level : SmallInt;
BreadCrumbs : array[0..PointerTrieDepth - 1] of SmallInt;
ANodeStack : array[0..PointerTrieDepth - 1] of PPointerTrieBaseNode;
LastResult : Pointer;
end;
{ TPointerTrie }
EPointerTrieDuplicate = class(Exception);
TPointerTrie = class
private
FRoot : PPointerTrieBranchNode;
FAllowDuplicates : Boolean;
FStats : TPointerTrieStats;
FCount : Integer;
function NewPointerTrieBranchNode : PPointerTrieBranchNode;
procedure FreeTrieNode(ANode : PPointerTrieBranchNode; Level : Byte);
function AddChild(ANode : PPointerTrieBranchNode; Level : Byte) : Integer;
function GetBitFieldIndex(p : Pointer; Level : Byte) : Byte; inline;
function GetChildIndex(ANode : PPointerTrieBranchNode; BitFieldIndex : Byte) : Byte; inline;
procedure SetChildIndex(ANode : PPointerTrieBranchNode; BitFieldIndex, ChildIndex : Byte); inline;
function GetBusyIndicator(ANode : PPointerTrieBaseNode; BitFieldIndex : Byte) : Boolean; inline;
procedure SetBusyIndicator(ANode : PPointerTrieBaseNode; BitFieldIndex : Byte; Value : Boolean); inline;
function InternalFind(p : Pointer; out ANode : PPointerTrieLeafNode; out AChildIndex : Byte) : Boolean;
function NextNode(ACurNode : PPointerTrieBranchNode; ALevel, AChildIndex : Byte) : Pointer; inline;
public
constructor Create;
destructor Destroy; override;
procedure Clear;
procedure Add(p : Pointer);
function Find(p : Pointer) : Boolean;
procedure Remove(p : Pointer);
procedure InitIterator(out AIterator : TPointerTrieIterator);
function Next(var AIterator : TPointerTrieIterator) : Pointer;
property AllowDuplicates : Boolean read FAllowDuplicates write FAllowDuplicates;
property Count : Integer read FCount;
property Stats : TPointerTrieStats read FStats;
end;
implementation
const
ChildIndexShiftArray : array[0..PointerTrieDepth - 1] of Byte = (28, 24, 20, 16, 12, 8, 4, 0);
CleanChildIndexMask : array[0..ChildrenPerBucket - 1] of Int64 =
($FFFFFFFFFFFFFFF0, $FFFFFFFFFFFFFF0F, $FFFFFFFFFFFFF0FF, $FFFFFFFFFFFF0FFF,
$FFFFFFFFFFF0FFFF, $FFFFFFFFFF0FFFFF, $FFFFFFFFF0FFFFFF, $FFFFFFFF0FFFFFFF,
$FFFFFFF0FFFFFFFF, $FFFFFF0FFFFFFFFF, $FFFFF0FFFFFFFFFF, $FFFF0FFFFFFFFFFF,
$FFF0FFFFFFFFFFFF, $FF0FFFFFFFFFFFFF, $F0FFFFFFFFFFFFFF, $0FFFFFFFFFFFFFFF);
resourcestring
STR_DUPLICATESNOTALLOWED = 'Duplicates not allowed';
{ TPointerTrie }
constructor TPointerTrie.Create;
begin
inherited Create;
FRoot := NewPointerTrieBranchNode();
end;
destructor TPointerTrie.Destroy;
begin
FreeTrieNode(@FRoot^.Base, 0);
inherited Destroy;
end;
function TPointerTrie.NewPointerTrieBranchNode : PPointerTrieBranchNode;
begin
GetMem(Result, sizeof(TPointerTrieBranchNode));
Result^.Base.Busy := 0;
Result^.Base.ChildrenCount := 0;
Result^.Children := nil;
Result^.ChildIndex := 0;
inc(FStats.NodeCount);
inc(FStats.TotalMemAlloced, sizeof(TPointerTrieBranchNode));
end;
procedure TPointerTrie.FreeTrieNode(ANode: PPointerTrieBranchNode; Level: Byte);
var
i : integer;
begin
if Level <= LastMidBranchNode then
for i := 0 to ANode^.Base.ChildrenCount - 1 do
FreeTrieNode(PPointerTrieNodeArray(ANode^.Children)^[i], Level + 1);
if (ANode^.Base.ChildrenCount > 0) and (ANode^.Children <> nil) then
FreeMem(ANode^.Children);
FreeMem(ANode);
end;
function TPointerTrie.AddChild(ANode: PPointerTrieBranchNode; Level: Byte
): Integer;
procedure ReallocArray(var Arr : Pointer; NewCount, ObjSize : Cardinal);
begin
ReallocMem(Arr, NewCount * ObjSize);
inc(FStats.TotalMemAlloced, ObjSize);
end;
begin
if Level <= LastMidBranchNode then
begin
ReallocArray(ANode^.Children, ANode^.Base.ChildrenCount + 1, sizeof(Pointer));
PPointerTrieNodeArray(ANode^.Children)^[ANode^.Base.ChildrenCount] := NewPointerTrieBranchNode();
end
else
begin
ReallocArray(ANode^.Children, ANode^.Base.ChildrenCount + 1, sizeof(TPointerTrieLeafNode));
PPointerTrieLeafNodeArray(ANode^.Children)^[ANode^.Base.ChildrenCount].Base.Busy := 0;
PPointerTrieLeafNodeArray(ANode^.Children)^[ANode^.Base.ChildrenCount].Base.ChildrenCount := 0;
end;
Result := ANode^.Base.ChildrenCount;
inc(ANode^.Base.ChildrenCount);
end;
function TPointerTrie.GetBitFieldIndex(p: Pointer; Level: Byte): Byte;
begin
Result := ({%H-}NativeInt(p) shr ChildIndexShiftArray[Level]) and BucketMask;
end;
function TPointerTrie.GetChildIndex(ANode: PPointerTrieBranchNode; BitFieldIndex: Byte
): Byte;
begin
Result := (ANode^.ChildIndex shr (Int64(BitFieldIndex) * BitsForChildIndexPerBucket)) and BucketMask;
end;
procedure TPointerTrie.SetChildIndex(ANode: PPointerTrieBranchNode; BitFieldIndex,
ChildIndex: Byte);
begin
ANode^.ChildIndex := ANode^.ChildIndex and CleanChildIndexMask[BitFieldIndex];
ANode^.ChildIndex := ANode^.ChildIndex or (Int64(ChildIndex) shl (BitFieldIndex * BitsForChildIndexPerBucket));
end;
function TPointerTrie.GetBusyIndicator(ANode: PPointerTrieBaseNode;
BitFieldIndex: Byte): Boolean;
begin
Result := (ANode^.Busy and (Word(1) shl BitFieldIndex)) <> 0;
end;
procedure TPointerTrie.SetBusyIndicator(ANode: PPointerTrieBaseNode;
BitFieldIndex: Byte; Value: Boolean);
begin
if Value then
ANode^.Busy := ANode^.Busy or (Word(1) shl BitFieldIndex)
else ANode^.Busy := ANode^.Busy and not (Word(1) shl BitFieldIndex);
end;
function TPointerTrie.InternalFind(p: Pointer; out ANode: PPointerTrieLeafNode;
out AChildIndex: Byte): Boolean;
var
i, BitFieldIndex : Byte;
CurNode : PPointerTrieBaseNode;
begin
ANode := nil;
Result := False;
AChildIndex := 0;
CurNode := @FRoot^.Base;
for i := 0 to PointerTrieDepth - 1 do
begin
BitFieldIndex := GetBitFieldIndex(p, i);
if not GetBusyIndicator(CurNode, BitFieldIndex) then
exit;
if i = PointerTrieDepth - 1 then
break;
AChildIndex := GetChildIndex(PPointerTrieBranchNode(CurNode), BitFieldIndex);
CurNode := NextNode(PPointerTrieBranchNode(CurNode), i, AChildIndex);
end;
ANode := PPointerTrieLeafNode(CurNode);
Result := True;
end;
function TPointerTrie.NextNode(ACurNode: PPointerTrieBranchNode; ALevel,
AChildIndex: Byte): Pointer;
begin
if ALevel <= LastMidBranchNode then
Result := PPointerTrieNodeArray(ACurNode^.Children)^[AChildIndex]
else Result := @PPointerTrieLeafNodeArray(ACurNode^.Children)^[AChildIndex];
end;
procedure TPointerTrie.Clear;
begin
FreeTrieNode(@FRoot^.Base, 0);
FStats.TotalMemAlloced := 0;
FStats.NodeCount := 0;
FCount := 0;
end;
procedure TPointerTrie.Add(p: Pointer);
var
i, BitFieldIndex, ChildIndex : Byte;
CurNode : PPointerTrieBaseNode;
begin
CurNode := @FRoot^.Base;
for i := 0 to PointerTrieDepth - 1 do
begin
BitFieldIndex := GetBitFieldIndex(p, i);
if (not FAllowDuplicates) and ( i = PointerTrieDepth - 1) and
GetBusyIndicator(CurNode, BitFieldIndex) then
raise EPointerTrieDuplicate.Create(STR_DUPLICATESNOTALLOWED);
if i < PointerTrieDepth - 1 then
if not GetBusyIndicator(CurNode, BitFieldIndex) then
begin
ChildIndex := AddChild(PPointerTrieBranchNode(CurNode), i);
SetChildIndex(PPointerTrieBranchNode(CurNode), BitFieldIndex, ChildIndex);
SetBusyIndicator(CurNode, BitFieldIndex, True);
end
else ChildIndex := GetChildIndex(PPointerTrieBranchNode(CurNode), BitFieldIndex)
else
begin
SetBusyIndicator(CurNode, BitFieldIndex, True);
break;
end;
CurNode := NextNode(PPointerTrieBranchNode(CurNode), i, ChildIndex);
end;
inc(FCount);
end;
function TPointerTrie.Find(p: Pointer): Boolean;
var
DummyChildIndex : Byte;
DummyNode : PPointerTrieLeafNode;
begin
Result := InternalFind(p, DummyNode, DummyChildIndex);
end;
procedure TPointerTrie.Remove(p: Pointer);
var
ChildIndex : Byte;
Node : PPointerTrieLeafNode;
begin
if InternalFind(p, Node, ChildIndex) then
begin
SetBusyIndicator(@Node^.Base, ChildIndex, False);
dec(FCount);
end;
end;
procedure TPointerTrie.InitIterator(out AIterator: TPointerTrieIterator);
var
i : Byte;
begin
AIterator.AtEnd := False;
AIterator.Level := 0;
AIterator.LastResult := nil;
for i := 0 to PointerTrieDepth - 1 do
begin
AIterator.BreadCrumbs[i] := 0;
AIterator.ANodeStack[i] := nil;
end;
end;
function TPointerTrie.Next(var AIterator: TPointerTrieIterator): Pointer;
begin
if AIterator.AtEnd then
begin
Result := nil;
exit;
end;
if AIterator.Level = 0 then
begin
AIterator.ANodeStack[0] := @FRoot^.Base;
Result := nil;
end
else Result := AIterator.LastResult;
repeat
while AIterator.BreadCrumbs[AIterator.Level] < ChildrenPerBucket do
begin
if GetBusyIndicator(AIterator.ANodeStack[AIterator.Level], AIterator.BreadCrumbs[AIterator.Level] ) then
begin
Result := {%H-}Pointer({%H-}NativeInt(Result) or AIterator.BreadCrumbs[AIterator.Level]);
inc(AIterator.Level);
if AIterator.Level >= PointerTrieDepth then
begin
inc(AIterator.BreadCrumbs[AIterator.Level - 1]);
dec(AIterator.Level);
AIterator.LastResult := Result;
exit;
end;
Result := {%H-}Pointer({%H-}NativeInt(Result) shl BitsForChildIndexPerBucket);
AIterator.ANodeStack[AIterator.Level] := NextNode(PPointerTrieBranchNode(AIterator.ANodeStack[AIterator.Level - 1]), AIterator.Level - 1,
GetChildIndex(PPointerTrieBranchNode(AIterator.ANodeStack[AIterator.Level - 1]),
AIterator.BreadCrumbs[AIterator.Level - 1]));
break;
end
else inc(AIterator.BreadCrumbs[AIterator.Level] );
end;
if AIterator.BreadCrumbs[AIterator.Level] >= ChildrenPerBucket then
begin
AIterator.BreadCrumbs[AIterator.Level] := 0;
dec(AIterator.Level);
if AIterator.Level >= 0 then
inc(AIterator.BreadCrumbs[AIterator.Level])
else break;
Result := {%H-}Pointer({%H-}NativeInt(Result) shr BitsForChildIndexPerBucket);
Result := {%H-}Pointer({%H-}NativeInt(Result) and not NativeInt(BucketMask));
end;
until False;
Result := nil;
AIterator.AtEnd := True;
end;
end.
@jsbattig
Copy link
Author

Still working on it. Need to add iteration support and direct indexed access to make for easy replacement of TList to hold pointers.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment