Skip to content

Instantly share code, notes, and snippets.

@ArtemAvramenko
Last active January 27, 2024 21:27
Show Gist options
  • Save ArtemAvramenko/175deb252352fcf19f3c3ddd2498415d to your computer and use it in GitHub Desktop.
Save ArtemAvramenko/175deb252352fcf19f3c3ddd2498415d to your computer and use it in GitHub Desktop.
// In practical scenarios, such a transformation speeds up the search by dozens of times:
// Apple, Apricot, Banana => (?:Ap(?:ple|ricot)|Banana)
private static void GenerateRegex(StringBuilder result, IList<string> words, int level = 0)
{
const int minPrefix = 1;
const int maxLevel = 300;
void AppendLevel()
{
// sb.Append('\n');
// sb.Append(new string(' ', level * 4));
}
if (words.Count == 1)
{
AppendLevel();
result.Append(Regex.Escape(words[0]));
return;
}
var prefixes = new Dictionary<string, List<string>>();
foreach (var word in words)
{
var n = level == maxLevel ? word.Length : Math.Min(word.Length, minPrefix);
var prefix = word[..n];
var suffix = word[n..];
if (!prefixes.TryGetValue(prefix, out var suffixes))
{
suffixes = new();
prefixes[prefix] = suffixes;
}
if (suffix.Length > 0)
{
suffixes.Add(suffix);
}
}
var isMulti = prefixes.Count > 1;
if (isMulti)
{
AppendLevel();
result.Append("(?:");
}
foreach (var prefix in prefixes)
{
AppendLevel();
result.Append(Regex.Escape(prefix.Key));
GenerateRegex(result, prefix.Value, level + 1);
if (isMulti)
{
AppendLevel();
result.Append('|');
}
}
if (isMulti)
{
result[^1] = ')';
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment