Created
March 7, 2022 11:25
-
-
Save mjs3339/4a53426ff6780b165e52ef9a2f368cf8 to your computer and use it in GitHub Desktop.
BoyerMoore String Searching Algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
public class BoyerMooreString | |
{ | |
private const int CMaxValue = char.MaxValue; | |
private int[] _jumpTable; | |
private string _pattern; | |
private int _patternLength; | |
public BoyerMooreString() | |
{ | |
} | |
public BoyerMooreString(string pattern) | |
{ | |
SetPattern(pattern); | |
} | |
public void SetPattern(string pattern) | |
{ | |
_pattern = pattern; | |
_jumpTable = new int[CMaxValue]; | |
_patternLength = _pattern.Length; | |
for (var index = 0; index < CMaxValue; index++) | |
_jumpTable[index] = _patternLength; | |
for (var index = 0; index < _patternLength - 1; index++) | |
_jumpTable[_pattern[index]] = _patternLength - index - 1; | |
} | |
public unsafe int Search(string searray, int startIndex = 0) | |
{ | |
if (_pattern == null) | |
throw new Exception("Pattern has not been set."); | |
if (_patternLength > searray.Length) | |
throw new Exception("Search Pattern length exceeds search array length."); | |
var index = startIndex; | |
var limit = searray.Length - _patternLength; | |
var patternLengthMinusOne = _patternLength - 1; | |
var Moves = 0; | |
fixed (char* pointerTocharArray = searray) | |
{ | |
var pointerTocharArrayStartingIndex = pointerTocharArray + startIndex; | |
fixed (char* pointerToPattern = _pattern) | |
{ | |
while (index <= limit) | |
{ | |
var j = patternLengthMinusOne; | |
while (j >= 0 && pointerToPattern[j] == pointerTocharArrayStartingIndex[index + j]) | |
j--; | |
if (j < 0) | |
return index; | |
index += _jumpTable[pointerTocharArrayStartingIndex[index + j]]; | |
Moves++; | |
} | |
} | |
} | |
return -1; | |
} | |
public unsafe (List<int>, int) SearchAll(string searray, int startIndex = 0) | |
{ | |
if (_pattern == null) | |
throw new Exception("Pattern has not been set."); | |
if (_patternLength > searray.Length) | |
throw new Exception("Search Pattern length exceeds search array length."); | |
var index = startIndex; | |
var limit = searray.Length - _patternLength; | |
var patternLengthMinusOne = _patternLength - 1; | |
var list = new List<int>(); | |
var Moves = 0; | |
fixed (char* pointerTocharArray = searray) | |
{ | |
var pointerTocharArrayStartingIndex = pointerTocharArray + startIndex; | |
fixed (char* pointerToPattern = _pattern) | |
{ | |
while (index <= limit) | |
{ | |
var j = patternLengthMinusOne; | |
while (j >= 0 && pointerToPattern[j] == pointerTocharArrayStartingIndex[index + j]) | |
j--; | |
if (j < 0) | |
list.Add(index); | |
index += _jumpTable[pointerTocharArrayStartingIndex[index + j]]; | |
Moves++; | |
} | |
} | |
} | |
return (list, Moves); | |
} | |
public int SuperSearch(string searray, int nth, int start = 0) | |
{ | |
var e = start; | |
var c = 0; | |
do | |
{ | |
e = Search(searray, e); | |
if (e == -1) | |
return -1; | |
c++; | |
e++; | |
} while (c < nth); | |
return e - 1; | |
} | |
public class BMPartialPatternSearchString | |
{ | |
public int MinimumSearchLength; | |
public BMPartialPatternSearchString(int min = 5) | |
{ | |
MinimumSearchLength = min; | |
} | |
public (string partialPattern, int idx) SearchPartial(string pattern, string searray) | |
{ | |
var bm = new BoyerMooreString(pattern); | |
var len = pattern.Length; | |
var tree = new List<string>(); | |
if (len < MinimumSearchLength) | |
throw new Exception("Search Pattern less than minimum search length."); | |
var offset = 0; | |
var wl = MinimumSearchLength; | |
do | |
{ | |
var tpat = pattern.Substring(offset, wl); | |
tree.Add(tpat); | |
bm.SetPattern(tpat); | |
var idx = bm.Search(searray); | |
if (idx != -1) | |
return (tpat, idx); | |
if (offset + wl >= len) | |
{ | |
offset = 0; | |
wl++; | |
continue; | |
} | |
offset++; | |
} while (wl != len + 1); | |
return (pattern, -1); | |
} | |
public List<(string partialPattern, int idx)> SearchPartialFirst(string pattern, string searray) | |
{ | |
var bm = new BoyerMooreString(pattern); | |
var len = pattern.Length; | |
var lst = new List<(string partialPattern, int idx)>(); | |
var tree = new List<string>(); | |
if (len < MinimumSearchLength) | |
throw new Exception("Search Pattern less than minimum search length."); | |
var offset = 0; | |
var wl = MinimumSearchLength; | |
do | |
{ | |
var tpat = pattern.Substring(offset, wl); | |
tree.Add(tpat); | |
bm.SetPattern(tpat); | |
var idx = bm.Search(searray); | |
if (idx != -1) | |
lst.Add((tpat, idx)); | |
if (offset + wl >= len) | |
{ | |
offset = 0; | |
wl++; | |
continue; | |
} | |
offset++; | |
} while (wl != len + 1); | |
return new List<(string partialPattern, int idx)> { (pattern, -1) }; | |
} | |
public List<(string partialPattern, int idx)> SearchPartialAll(string pattern, string searray) | |
{ | |
var bm = new BoyerMooreString(pattern); | |
var len = pattern.Length; | |
var lst = new List<(string partialPattern, int idx)>(); | |
var tree = new List<string>(); | |
if (len < MinimumSearchLength) | |
throw new Exception("Search Pattern less than minimum search length."); | |
var offset = 0; | |
var wl = MinimumSearchLength; | |
do | |
{ | |
var tpat = pattern.Substring(offset, wl); | |
tree.Add(tpat); | |
bm.SetPattern(tpat); | |
var idxl = bm.SearchAll(searray); | |
if (idxl.Item1.Count > 0) | |
foreach (var idx in idxl.Item1) | |
lst.Add((tpat, idx)); | |
if (offset + wl >= len) | |
{ | |
offset = 0; | |
wl++; | |
continue; | |
} | |
offset++; | |
} while (wl != len + 1); | |
return lst; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment