Skip to content

Instantly share code, notes, and snippets.

@mjs3339
Created March 7, 2022 11:25
Show Gist options
  • Save mjs3339/4a53426ff6780b165e52ef9a2f368cf8 to your computer and use it in GitHub Desktop.
Save mjs3339/4a53426ff6780b165e52ef9a2f368cf8 to your computer and use it in GitHub Desktop.
BoyerMoore String Searching Algorithm
using System;
using System.Collections.Generic;
public class BoyerMooreString
{
private const int CMaxValue = char.MaxValue;
private int[] _jumpTable;
private string _pattern;
private int _patternLength;
public BoyerMooreString()
{
}
public BoyerMooreString(string pattern)
{
SetPattern(pattern);
}
public void SetPattern(string pattern)
{
_pattern = pattern;
_jumpTable = new int[CMaxValue];
_patternLength = _pattern.Length;
for (var index = 0; index < CMaxValue; index++)
_jumpTable[index] = _patternLength;
for (var index = 0; index < _patternLength - 1; index++)
_jumpTable[_pattern[index]] = _patternLength - index - 1;
}
public unsafe int Search(string searray, int startIndex = 0)
{
if (_pattern == null)
throw new Exception("Pattern has not been set.");
if (_patternLength > searray.Length)
throw new Exception("Search Pattern length exceeds search array length.");
var index = startIndex;
var limit = searray.Length - _patternLength;
var patternLengthMinusOne = _patternLength - 1;
var Moves = 0;
fixed (char* pointerTocharArray = searray)
{
var pointerTocharArrayStartingIndex = pointerTocharArray + startIndex;
fixed (char* pointerToPattern = _pattern)
{
while (index <= limit)
{
var j = patternLengthMinusOne;
while (j >= 0 && pointerToPattern[j] == pointerTocharArrayStartingIndex[index + j])
j--;
if (j < 0)
return index;
index += _jumpTable[pointerTocharArrayStartingIndex[index + j]];
Moves++;
}
}
}
return -1;
}
public unsafe (List<int>, int) SearchAll(string searray, int startIndex = 0)
{
if (_pattern == null)
throw new Exception("Pattern has not been set.");
if (_patternLength > searray.Length)
throw new Exception("Search Pattern length exceeds search array length.");
var index = startIndex;
var limit = searray.Length - _patternLength;
var patternLengthMinusOne = _patternLength - 1;
var list = new List<int>();
var Moves = 0;
fixed (char* pointerTocharArray = searray)
{
var pointerTocharArrayStartingIndex = pointerTocharArray + startIndex;
fixed (char* pointerToPattern = _pattern)
{
while (index <= limit)
{
var j = patternLengthMinusOne;
while (j >= 0 && pointerToPattern[j] == pointerTocharArrayStartingIndex[index + j])
j--;
if (j < 0)
list.Add(index);
index += _jumpTable[pointerTocharArrayStartingIndex[index + j]];
Moves++;
}
}
}
return (list, Moves);
}
public int SuperSearch(string searray, int nth, int start = 0)
{
var e = start;
var c = 0;
do
{
e = Search(searray, e);
if (e == -1)
return -1;
c++;
e++;
} while (c < nth);
return e - 1;
}
public class BMPartialPatternSearchString
{
public int MinimumSearchLength;
public BMPartialPatternSearchString(int min = 5)
{
MinimumSearchLength = min;
}
public (string partialPattern, int idx) SearchPartial(string pattern, string searray)
{
var bm = new BoyerMooreString(pattern);
var len = pattern.Length;
var tree = new List<string>();
if (len < MinimumSearchLength)
throw new Exception("Search Pattern less than minimum search length.");
var offset = 0;
var wl = MinimumSearchLength;
do
{
var tpat = pattern.Substring(offset, wl);
tree.Add(tpat);
bm.SetPattern(tpat);
var idx = bm.Search(searray);
if (idx != -1)
return (tpat, idx);
if (offset + wl >= len)
{
offset = 0;
wl++;
continue;
}
offset++;
} while (wl != len + 1);
return (pattern, -1);
}
public List<(string partialPattern, int idx)> SearchPartialFirst(string pattern, string searray)
{
var bm = new BoyerMooreString(pattern);
var len = pattern.Length;
var lst = new List<(string partialPattern, int idx)>();
var tree = new List<string>();
if (len < MinimumSearchLength)
throw new Exception("Search Pattern less than minimum search length.");
var offset = 0;
var wl = MinimumSearchLength;
do
{
var tpat = pattern.Substring(offset, wl);
tree.Add(tpat);
bm.SetPattern(tpat);
var idx = bm.Search(searray);
if (idx != -1)
lst.Add((tpat, idx));
if (offset + wl >= len)
{
offset = 0;
wl++;
continue;
}
offset++;
} while (wl != len + 1);
return new List<(string partialPattern, int idx)> { (pattern, -1) };
}
public List<(string partialPattern, int idx)> SearchPartialAll(string pattern, string searray)
{
var bm = new BoyerMooreString(pattern);
var len = pattern.Length;
var lst = new List<(string partialPattern, int idx)>();
var tree = new List<string>();
if (len < MinimumSearchLength)
throw new Exception("Search Pattern less than minimum search length.");
var offset = 0;
var wl = MinimumSearchLength;
do
{
var tpat = pattern.Substring(offset, wl);
tree.Add(tpat);
bm.SetPattern(tpat);
var idxl = bm.SearchAll(searray);
if (idxl.Item1.Count > 0)
foreach (var idx in idxl.Item1)
lst.Add((tpat, idx));
if (offset + wl >= len)
{
offset = 0;
wl++;
continue;
}
offset++;
} while (wl != len + 1);
return lst;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment