Skip to content

Instantly share code, notes, and snippets.

@mjs3339
Last active August 5, 2024 02:57
Show Gist options
  • Save mjs3339/0772431281093f1bca1fce2f2eca527d to your computer and use it in GitHub Desktop.
Save mjs3339/0772431281093f1bca1fce2f2eca527d to your computer and use it in GitHub Desktop.
C# High Performance Boyer Moore Byte Array Search Algorithm
public class BoyerMoore
{
private int[] _jumpTable;
private byte[] _pattern;
private int _patternLength;
public BoyerMoore()
{
}
public BoyerMoore(byte[] pattern)
{
_pattern = pattern;
_jumpTable = new int[256];
_patternLength = _pattern.Length;
for(var index = 0; index < 256; index++)
_jumpTable[index] = _patternLength;
for(var index = 0; index < _patternLength - 1; index++)
_jumpTable[_pattern[index]] = _patternLength - index - 1;
}
public void SetPattern(byte[] pattern)
{
_pattern = pattern;
_jumpTable = new int[256];
_patternLength = _pattern.Length;
for(var index = 0; index < 256; index++)
_jumpTable[index] = _patternLength;
for(var index = 0; index < _patternLength - 1; index++)
_jumpTable[_pattern[index]] = _patternLength - index - 1;
}
public unsafe int Search(byte[] searchArray, int startIndex = 0)
{
if(_pattern == null)
throw new Exception("Pattern has not been set.");
if(_patternLength > searchArray.Length)
throw new Exception("Search Pattern length exceeds search array length.");
var index = startIndex;
var limit = searchArray.Length - _patternLength;
var patternLengthMinusOne = _patternLength - 1;
fixed(byte* pointerToByteArray = searchArray)
{
var pointerToByteArrayStartingIndex = pointerToByteArray + startIndex;
fixed(byte* pointerToPattern = _pattern)
{
while(index <= limit)
{
var j = patternLengthMinusOne;
while(j >= 0 && pointerToPattern[j] == pointerToByteArrayStartingIndex[index + j])
j--;
if(j < 0)
return index;
index += Math.Max(_jumpTable[pointerToByteArrayStartingIndex[index + j]] - _patternLength + 1 + j, 1);
}
}
}
return-1;
}
public unsafe List<int> SearchAll(byte[] searchArray, int startIndex = 0)
{
var index = startIndex;
var limit = searchArray.Length - _patternLength;
var patternLengthMinusOne = _patternLength - 1;
var list = new List<int>();
fixed(byte* pointerToByteArray = searchArray)
{
var pointerToByteArrayStartingIndex = pointerToByteArray + startIndex;
fixed(byte* pointerToPattern = _pattern)
{
while(index <= limit)
{
var j = patternLengthMinusOne;
while(j >= 0 && pointerToPattern[j] == pointerToByteArrayStartingIndex[index + j])
j--;
if(j < 0)
list.Add(index);
index += Math.Max(_jumpTable[pointerToByteArrayStartingIndex[index + j]] - _patternLength + 1 + j, 1);
}
}
}
return list;
}
public int SuperSearch(byte[] searchArray, int nth, int start = 0)
{
var e = start;
var c = 0;
do
{
e = Search(searchArray, e);
if(e == -1)
return-1;
c++;
e++;
} while(c < nth);
return e - 1;
}
}
@cliveontoast
Copy link

Works like a charm.

@pedoc
Copy link

pedoc commented Oct 25, 2018

it's working,but there are some problems.example
source:
Offset 0 1 2 3 4 5 6 7 8 9 A B C D E F
00000000 00 00 01 BA 5C 3B A5 2D B4 01 00 FA 07 FE FF FF 篭;?? ??
00000010 00 00 00 37 00 00 01 E0 13 FA 8C 80 07 27 0E E9 7 ?鷮€ ' ?
00000020 4B 6D FF FD 00 00 00 01 61 E0 20 97 FF 00 10 BC Km? a?? ?
00000030 7B 33 22 4C 6B B9 9F 8F 52 FC A9 0C 6C 6B 99 70 {3"Lk篃 R lk檖
00000040 94 69 D7 55 7C 0E 30 BB 59 81 43 96 1B F7 F8 78 攊譛| 0籝 C?鼬x
00000050 94 78 DC E2 40 5D 18 5B A1 2E 88 33 C9 E6 42 67 攛茆@] [??涉Bg
00000060 7C 41 5D 11 37 0E 60 A0 3A 04 03 82 BF AD FF EC |A] 7 `? 偪??

pattern:
00 00 01 BA

Search result it's not eq 0

@pedoc
Copy link

pedoc commented Oct 25, 2018

work fine,it's my fault,sorry

@tigros
Copy link

tigros commented Jan 12, 2020

Is it common knowledge that these algorithms aren't thorough? It should be clear somewhere that they won't always find all occurrences! I just wasted some time! Interesting but not accurate.

@SiriusED
Copy link

SiriusED commented May 15, 2024

Hi, Is this possible to search backwards using this algorithm? Like to perform Find last occurrence feature?
I tried to just reverse loop and indexes in the SearchAll method but seems it does not work... It works only if I shift index always to index -= 1
Here is what I tried:

index = limit;
var k = patternLengthMinusOne;
while(index >= 0)
{
	long j = 0;
	while(j <= k && pointerToPattern[j] == pointerToByteArrayStartingIndex[index + j])
		j++;
	if(j > k)
		list.Add(index);
	index -= 1;
}

How to make it use _jumpTable with "backward" searching?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment