Last active
January 27, 2024 21:27
-
-
Save ArtemAvramenko/175deb252352fcf19f3c3ddd2498415d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// In practical scenarios, such a transformation speeds up the search by dozens of times: | |
// Apple, Apricot, Banana => (?:Ap(?:ple|ricot)|Banana) | |
private static void GenerateRegex(StringBuilder result, IList<string> words, int level = 0) | |
{ | |
const int minPrefix = 1; | |
const int maxLevel = 300; | |
void AppendLevel() | |
{ | |
// sb.Append('\n'); | |
// sb.Append(new string(' ', level * 4)); | |
} | |
if (words.Count == 1) | |
{ | |
AppendLevel(); | |
result.Append(Regex.Escape(words[0])); | |
return; | |
} | |
var prefixes = new Dictionary<string, List<string>>(); | |
foreach (var word in words) | |
{ | |
var n = level == maxLevel ? word.Length : Math.Min(word.Length, minPrefix); | |
var prefix = word[..n]; | |
var suffix = word[n..]; | |
if (!prefixes.TryGetValue(prefix, out var suffixes)) | |
{ | |
suffixes = new(); | |
prefixes[prefix] = suffixes; | |
} | |
if (suffix.Length > 0) | |
{ | |
suffixes.Add(suffix); | |
} | |
} | |
var isMulti = prefixes.Count > 1; | |
if (isMulti) | |
{ | |
AppendLevel(); | |
result.Append("(?:"); | |
} | |
foreach (var prefix in prefixes) | |
{ | |
AppendLevel(); | |
result.Append(Regex.Escape(prefix.Key)); | |
GenerateRegex(result, prefix.Value, level + 1); | |
if (isMulti) | |
{ | |
AppendLevel(); | |
result.Append('|'); | |
} | |
} | |
if (isMulti) | |
{ | |
result[^1] = ')'; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment