Last active
December 24, 2015 03:43
-
-
Save jsbattig/5a65c2f153539ddc3b9c to your computer and use it in GitHub Desktop.
Pointer-Trie implementation for fast insertion, finding, and removal of Pointers in a space-efficient manner (about 30% space efficiency in average)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
unit PointerTrie; | |
{$mode objfpc}{$H+} | |
interface | |
uses | |
Classes, SysUtils; | |
const | |
BitsPerByte = 8; | |
BitsForChildIndexPerBucket = 4; | |
BucketMask = $F; | |
PointerTrieDepth = sizeof(Pointer) * BitsPerByte div BitsForChildIndexPerBucket; | |
ChildrenPerBucket = BitsForChildIndexPerBucket * BitsForChildIndexPerBucket; | |
LastMidBranchNode = PointerTrieDepth - 3; | |
type | |
PPointerTrieBaseNode = ^TPointerTrieBaseNode; | |
TPointerTrieBaseNode = record | |
ChildrenCount : Word; | |
Busy : Word; | |
end; | |
PPointerTrieBranchNode = ^TPointerTrieBranchNode; | |
TPointerTrieNodeArray = array[0..ChildrenPerBucket - 1] of PPointerTrieBranchNode; | |
PPointerTrieNodeArray = ^TPointerTrieNodeArray; | |
TPointerTrieBranchNode = record | |
Base : TPointerTrieBaseNode; | |
ChildIndex : Int64; | |
Children : Pointer; | |
end; | |
PPointerTrieLeafNode = ^TPointerTrieLeafNode; | |
TPointerTrieLeafNode = record | |
Base : TPointerTrieBaseNode; | |
end; | |
TPointerTrieLeafNodeArray = array[0..ChildrenPerBucket - 1] of TPointerTrieLeafNode; | |
PPointerTrieLeafNodeArray = ^TPointerTrieLeafNodeArray; | |
TPointerTrieStats = record | |
NodeCount : Integer; | |
TotalMemAlloced : Int64; | |
end; | |
TPointerTrieIterator = record | |
AtEnd : Boolean; | |
Level : SmallInt; | |
BreadCrumbs : array[0..PointerTrieDepth - 1] of SmallInt; | |
ANodeStack : array[0..PointerTrieDepth - 1] of PPointerTrieBaseNode; | |
LastResult : Pointer; | |
end; | |
{ TPointerTrie } | |
EPointerTrieDuplicate = class(Exception); | |
TPointerTrie = class | |
private | |
FRoot : PPointerTrieBranchNode; | |
FAllowDuplicates : Boolean; | |
FStats : TPointerTrieStats; | |
FCount : Integer; | |
function NewPointerTrieBranchNode : PPointerTrieBranchNode; | |
procedure FreeTrieNode(ANode : PPointerTrieBranchNode; Level : Byte); | |
function AddChild(ANode : PPointerTrieBranchNode; Level : Byte) : Integer; | |
function GetBitFieldIndex(p : Pointer; Level : Byte) : Byte; inline; | |
function GetChildIndex(ANode : PPointerTrieBranchNode; BitFieldIndex : Byte) : Byte; inline; | |
procedure SetChildIndex(ANode : PPointerTrieBranchNode; BitFieldIndex, ChildIndex : Byte); inline; | |
function GetBusyIndicator(ANode : PPointerTrieBaseNode; BitFieldIndex : Byte) : Boolean; inline; | |
procedure SetBusyIndicator(ANode : PPointerTrieBaseNode; BitFieldIndex : Byte; Value : Boolean); inline; | |
function InternalFind(p : Pointer; out ANode : PPointerTrieLeafNode; out AChildIndex : Byte) : Boolean; | |
function NextNode(ACurNode : PPointerTrieBranchNode; ALevel, AChildIndex : Byte) : Pointer; inline; | |
public | |
constructor Create; | |
destructor Destroy; override; | |
procedure Clear; | |
procedure Add(p : Pointer); | |
function Find(p : Pointer) : Boolean; | |
procedure Remove(p : Pointer); | |
procedure InitIterator(out AIterator : TPointerTrieIterator); | |
function Next(var AIterator : TPointerTrieIterator) : Pointer; | |
property AllowDuplicates : Boolean read FAllowDuplicates write FAllowDuplicates; | |
property Count : Integer read FCount; | |
property Stats : TPointerTrieStats read FStats; | |
end; | |
implementation | |
const | |
ChildIndexShiftArray : array[0..PointerTrieDepth - 1] of Byte = (28, 24, 20, 16, 12, 8, 4, 0); | |
CleanChildIndexMask : array[0..ChildrenPerBucket - 1] of Int64 = | |
($FFFFFFFFFFFFFFF0, $FFFFFFFFFFFFFF0F, $FFFFFFFFFFFFF0FF, $FFFFFFFFFFFF0FFF, | |
$FFFFFFFFFFF0FFFF, $FFFFFFFFFF0FFFFF, $FFFFFFFFF0FFFFFF, $FFFFFFFF0FFFFFFF, | |
$FFFFFFF0FFFFFFFF, $FFFFFF0FFFFFFFFF, $FFFFF0FFFFFFFFFF, $FFFF0FFFFFFFFFFF, | |
$FFF0FFFFFFFFFFFF, $FF0FFFFFFFFFFFFF, $F0FFFFFFFFFFFFFF, $0FFFFFFFFFFFFFFF); | |
resourcestring | |
STR_DUPLICATESNOTALLOWED = 'Duplicates not allowed'; | |
{ TPointerTrie } | |
constructor TPointerTrie.Create; | |
begin | |
inherited Create; | |
FRoot := NewPointerTrieBranchNode(); | |
end; | |
destructor TPointerTrie.Destroy; | |
begin | |
FreeTrieNode(@FRoot^.Base, 0); | |
inherited Destroy; | |
end; | |
function TPointerTrie.NewPointerTrieBranchNode : PPointerTrieBranchNode; | |
begin | |
GetMem(Result, sizeof(TPointerTrieBranchNode)); | |
Result^.Base.Busy := 0; | |
Result^.Base.ChildrenCount := 0; | |
Result^.Children := nil; | |
Result^.ChildIndex := 0; | |
inc(FStats.NodeCount); | |
inc(FStats.TotalMemAlloced, sizeof(TPointerTrieBranchNode)); | |
end; | |
procedure TPointerTrie.FreeTrieNode(ANode: PPointerTrieBranchNode; Level: Byte); | |
var | |
i : integer; | |
begin | |
if Level <= LastMidBranchNode then | |
for i := 0 to ANode^.Base.ChildrenCount - 1 do | |
FreeTrieNode(PPointerTrieNodeArray(ANode^.Children)^[i], Level + 1); | |
if (ANode^.Base.ChildrenCount > 0) and (ANode^.Children <> nil) then | |
FreeMem(ANode^.Children); | |
FreeMem(ANode); | |
end; | |
function TPointerTrie.AddChild(ANode: PPointerTrieBranchNode; Level: Byte | |
): Integer; | |
procedure ReallocArray(var Arr : Pointer; NewCount, ObjSize : Cardinal); | |
begin | |
ReallocMem(Arr, NewCount * ObjSize); | |
inc(FStats.TotalMemAlloced, ObjSize); | |
end; | |
begin | |
if Level <= LastMidBranchNode then | |
begin | |
ReallocArray(ANode^.Children, ANode^.Base.ChildrenCount + 1, sizeof(Pointer)); | |
PPointerTrieNodeArray(ANode^.Children)^[ANode^.Base.ChildrenCount] := NewPointerTrieBranchNode(); | |
end | |
else | |
begin | |
ReallocArray(ANode^.Children, ANode^.Base.ChildrenCount + 1, sizeof(TPointerTrieLeafNode)); | |
PPointerTrieLeafNodeArray(ANode^.Children)^[ANode^.Base.ChildrenCount].Base.Busy := 0; | |
PPointerTrieLeafNodeArray(ANode^.Children)^[ANode^.Base.ChildrenCount].Base.ChildrenCount := 0; | |
end; | |
Result := ANode^.Base.ChildrenCount; | |
inc(ANode^.Base.ChildrenCount); | |
end; | |
function TPointerTrie.GetBitFieldIndex(p: Pointer; Level: Byte): Byte; | |
begin | |
Result := ({%H-}NativeInt(p) shr ChildIndexShiftArray[Level]) and BucketMask; | |
end; | |
function TPointerTrie.GetChildIndex(ANode: PPointerTrieBranchNode; BitFieldIndex: Byte | |
): Byte; | |
begin | |
Result := (ANode^.ChildIndex shr (Int64(BitFieldIndex) * BitsForChildIndexPerBucket)) and BucketMask; | |
end; | |
procedure TPointerTrie.SetChildIndex(ANode: PPointerTrieBranchNode; BitFieldIndex, | |
ChildIndex: Byte); | |
begin | |
ANode^.ChildIndex := ANode^.ChildIndex and CleanChildIndexMask[BitFieldIndex]; | |
ANode^.ChildIndex := ANode^.ChildIndex or (Int64(ChildIndex) shl (BitFieldIndex * BitsForChildIndexPerBucket)); | |
end; | |
function TPointerTrie.GetBusyIndicator(ANode: PPointerTrieBaseNode; | |
BitFieldIndex: Byte): Boolean; | |
begin | |
Result := (ANode^.Busy and (Word(1) shl BitFieldIndex)) <> 0; | |
end; | |
procedure TPointerTrie.SetBusyIndicator(ANode: PPointerTrieBaseNode; | |
BitFieldIndex: Byte; Value: Boolean); | |
begin | |
if Value then | |
ANode^.Busy := ANode^.Busy or (Word(1) shl BitFieldIndex) | |
else ANode^.Busy := ANode^.Busy and not (Word(1) shl BitFieldIndex); | |
end; | |
function TPointerTrie.InternalFind(p: Pointer; out ANode: PPointerTrieLeafNode; | |
out AChildIndex: Byte): Boolean; | |
var | |
i, BitFieldIndex : Byte; | |
CurNode : PPointerTrieBaseNode; | |
begin | |
ANode := nil; | |
Result := False; | |
AChildIndex := 0; | |
CurNode := @FRoot^.Base; | |
for i := 0 to PointerTrieDepth - 1 do | |
begin | |
BitFieldIndex := GetBitFieldIndex(p, i); | |
if not GetBusyIndicator(CurNode, BitFieldIndex) then | |
exit; | |
if i = PointerTrieDepth - 1 then | |
break; | |
AChildIndex := GetChildIndex(PPointerTrieBranchNode(CurNode), BitFieldIndex); | |
CurNode := NextNode(PPointerTrieBranchNode(CurNode), i, AChildIndex); | |
end; | |
ANode := PPointerTrieLeafNode(CurNode); | |
Result := True; | |
end; | |
function TPointerTrie.NextNode(ACurNode: PPointerTrieBranchNode; ALevel, | |
AChildIndex: Byte): Pointer; | |
begin | |
if ALevel <= LastMidBranchNode then | |
Result := PPointerTrieNodeArray(ACurNode^.Children)^[AChildIndex] | |
else Result := @PPointerTrieLeafNodeArray(ACurNode^.Children)^[AChildIndex]; | |
end; | |
procedure TPointerTrie.Clear; | |
begin | |
FreeTrieNode(@FRoot^.Base, 0); | |
FStats.TotalMemAlloced := 0; | |
FStats.NodeCount := 0; | |
FCount := 0; | |
end; | |
procedure TPointerTrie.Add(p: Pointer); | |
var | |
i, BitFieldIndex, ChildIndex : Byte; | |
CurNode : PPointerTrieBaseNode; | |
begin | |
CurNode := @FRoot^.Base; | |
for i := 0 to PointerTrieDepth - 1 do | |
begin | |
BitFieldIndex := GetBitFieldIndex(p, i); | |
if (not FAllowDuplicates) and ( i = PointerTrieDepth - 1) and | |
GetBusyIndicator(CurNode, BitFieldIndex) then | |
raise EPointerTrieDuplicate.Create(STR_DUPLICATESNOTALLOWED); | |
if i < PointerTrieDepth - 1 then | |
if not GetBusyIndicator(CurNode, BitFieldIndex) then | |
begin | |
ChildIndex := AddChild(PPointerTrieBranchNode(CurNode), i); | |
SetChildIndex(PPointerTrieBranchNode(CurNode), BitFieldIndex, ChildIndex); | |
SetBusyIndicator(CurNode, BitFieldIndex, True); | |
end | |
else ChildIndex := GetChildIndex(PPointerTrieBranchNode(CurNode), BitFieldIndex) | |
else | |
begin | |
SetBusyIndicator(CurNode, BitFieldIndex, True); | |
break; | |
end; | |
CurNode := NextNode(PPointerTrieBranchNode(CurNode), i, ChildIndex); | |
end; | |
inc(FCount); | |
end; | |
function TPointerTrie.Find(p: Pointer): Boolean; | |
var | |
DummyChildIndex : Byte; | |
DummyNode : PPointerTrieLeafNode; | |
begin | |
Result := InternalFind(p, DummyNode, DummyChildIndex); | |
end; | |
procedure TPointerTrie.Remove(p: Pointer); | |
var | |
ChildIndex : Byte; | |
Node : PPointerTrieLeafNode; | |
begin | |
if InternalFind(p, Node, ChildIndex) then | |
begin | |
SetBusyIndicator(@Node^.Base, ChildIndex, False); | |
dec(FCount); | |
end; | |
end; | |
procedure TPointerTrie.InitIterator(out AIterator: TPointerTrieIterator); | |
var | |
i : Byte; | |
begin | |
AIterator.AtEnd := False; | |
AIterator.Level := 0; | |
AIterator.LastResult := nil; | |
for i := 0 to PointerTrieDepth - 1 do | |
begin | |
AIterator.BreadCrumbs[i] := 0; | |
AIterator.ANodeStack[i] := nil; | |
end; | |
end; | |
function TPointerTrie.Next(var AIterator: TPointerTrieIterator): Pointer; | |
begin | |
if AIterator.AtEnd then | |
begin | |
Result := nil; | |
exit; | |
end; | |
if AIterator.Level = 0 then | |
begin | |
AIterator.ANodeStack[0] := @FRoot^.Base; | |
Result := nil; | |
end | |
else Result := AIterator.LastResult; | |
repeat | |
while AIterator.BreadCrumbs[AIterator.Level] < ChildrenPerBucket do | |
begin | |
if GetBusyIndicator(AIterator.ANodeStack[AIterator.Level], AIterator.BreadCrumbs[AIterator.Level] ) then | |
begin | |
Result := {%H-}Pointer({%H-}NativeInt(Result) or AIterator.BreadCrumbs[AIterator.Level]); | |
inc(AIterator.Level); | |
if AIterator.Level >= PointerTrieDepth then | |
begin | |
inc(AIterator.BreadCrumbs[AIterator.Level - 1]); | |
dec(AIterator.Level); | |
AIterator.LastResult := Result; | |
exit; | |
end; | |
Result := {%H-}Pointer({%H-}NativeInt(Result) shl BitsForChildIndexPerBucket); | |
AIterator.ANodeStack[AIterator.Level] := NextNode(PPointerTrieBranchNode(AIterator.ANodeStack[AIterator.Level - 1]), AIterator.Level - 1, | |
GetChildIndex(PPointerTrieBranchNode(AIterator.ANodeStack[AIterator.Level - 1]), | |
AIterator.BreadCrumbs[AIterator.Level - 1])); | |
break; | |
end | |
else inc(AIterator.BreadCrumbs[AIterator.Level] ); | |
end; | |
if AIterator.BreadCrumbs[AIterator.Level] >= ChildrenPerBucket then | |
begin | |
AIterator.BreadCrumbs[AIterator.Level] := 0; | |
dec(AIterator.Level); | |
if AIterator.Level >= 0 then | |
inc(AIterator.BreadCrumbs[AIterator.Level]) | |
else break; | |
Result := {%H-}Pointer({%H-}NativeInt(Result) shr BitsForChildIndexPerBucket); | |
Result := {%H-}Pointer({%H-}NativeInt(Result) and not NativeInt(BucketMask)); | |
end; | |
until False; | |
Result := nil; | |
AIterator.AtEnd := True; | |
end; | |
end. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Still working on it. Need to add iteration support and direct indexed access to make for easy replacement of TList to hold pointers.