Created
August 5, 2020 23:06
-
-
Save eocron/ef5c2b85115c9bdff43dd4413b39be6e to your computer and use it in GitHub Desktop.
Suffix automata implementation and few algorithms from https://cp-algorithms.com/string/suffix-automaton.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections; | |
using System.Collections.Generic; | |
using System.Linq; | |
namespace ConsoleApp1 | |
{ | |
public interface ISuffixAutomataState<T> | |
{ | |
bool IsTerminal { get; } | |
int Length { get; } | |
ISuffixAutomataState<T> Link { get; } | |
ISuffixAutomataState<T> Move(T next); | |
IEnumerable<Tuple<T, ISuffixAutomataState<T>>> GetAllNexts(); | |
} | |
public interface ISuffixAutomata<T> | |
{ | |
ISuffixAutomataState<T> First(); | |
public void Extend(IEnumerable<T> items); | |
public void Extend(T item); | |
} | |
public static class SuffixAutomataExtensions | |
{ | |
public static bool Contains<T>(this ISuffixAutomata<T> fsa, IEnumerable<T> subSet) | |
{ | |
var fail = false; | |
var iter = fsa.First(); | |
foreach (var c in subSet) | |
{ | |
iter = iter.Move(c); | |
if (iter == null) | |
{ | |
fail = true; | |
break; | |
} | |
} | |
return !fail; | |
} | |
public static bool EndsWith<T>(this ISuffixAutomata<T> fsa, IEnumerable<T> subSet) | |
{ | |
var fail = false; | |
var iter = fsa.First(); | |
foreach (var c in subSet) | |
{ | |
iter = iter.Move(c); | |
if (iter == null) | |
{ | |
fail = true; | |
break; | |
} | |
} | |
return !fail && iter.IsTerminal; | |
} | |
public static void LongestCommonSubArray<T>(this ISuffixAutomata<T> fsa, IEnumerable<T> subSet, out int offset, out int count) | |
{ | |
int l = 0, best = 0, bestpos = 0, i = 0; | |
var iter = fsa.First(); | |
foreach (var t in subSet) | |
{ | |
while (iter != null && iter.Move(t) == null) | |
{ | |
iter = iter.Link; | |
l = iter.Length; | |
} | |
var tmp = iter.Move(t); | |
if (tmp != null) | |
{ | |
iter = tmp; | |
l++; | |
} | |
if (l > best) | |
{ | |
best = l; | |
bestpos = i; | |
} | |
i++; | |
} | |
offset = bestpos - best + 1; | |
count = best; | |
} | |
} | |
public sealed class SuffixAutomata<T> : ISuffixAutomata<T> | |
{ | |
private sealed class SuffixMap<V> : IEnumerable<KeyValuePair<T, V>> | |
{ | |
private readonly SortedDictionary<T, V> _sd; | |
private readonly Dictionary<T, V> _d; | |
public SuffixMap(object comparer) | |
{ | |
var eq = comparer as IEqualityComparer<T>; | |
var cmp = comparer as IComparer<T>; | |
if (cmp != null) | |
{ | |
_sd = new SortedDictionary<T, V>(cmp); | |
} | |
else if (eq != null) | |
{ | |
_d = new Dictionary<T, V>(eq); | |
} | |
} | |
public SuffixMap(SuffixMap<V> next) | |
{ | |
_sd = next._sd == null ? null : new SortedDictionary<T, V>(next._sd, next._sd.Comparer); | |
_d = next._d == null ? null : new Dictionary<T, V>(next._d, next._d.Comparer); | |
} | |
public IEnumerator<KeyValuePair<T, V>> GetEnumerator() | |
{ | |
return (IEnumerator<KeyValuePair<T, V>>)_sd?.GetEnumerator() ?? _d.GetEnumerator(); | |
} | |
internal bool ContainsKey(T c) | |
{ | |
if (_sd != null) | |
return _sd.ContainsKey(c); | |
return _d.ContainsKey(c); | |
} | |
internal bool TryGetValue(T key, out V value) | |
{ | |
if (_sd != null) | |
return _sd.TryGetValue(key, out value); | |
return _d.TryGetValue(key, out value); | |
} | |
IEnumerator IEnumerable.GetEnumerator() | |
{ | |
return GetEnumerator(); | |
} | |
internal void Set(T key, V value) | |
{ | |
if (_sd != null) | |
_sd[key] = value; | |
else | |
_d[key] = value; | |
} | |
internal V Get(T key) | |
{ | |
if (_sd != null) | |
return _sd[key]; | |
return _d[key]; | |
} | |
} | |
private sealed class SuffixAutomataState : ISuffixAutomataState<T> | |
{ | |
public int Length { get; set; } | |
public int link { get; set; } | |
public ISuffixAutomataState<T> Link => link >= 0 ? _states[link] : null; | |
public bool IsTerminal { get; set; } | |
public SuffixMap<int> next { get; set; } | |
private readonly List<SuffixAutomataState> _states; | |
internal SuffixAutomataState(List<SuffixAutomataState> states) { _states = states; } | |
public ISuffixAutomataState<T> Move(T item) | |
{ | |
int nextItemId; | |
if (next.TryGetValue(item, out nextItemId)) | |
{ | |
return _states[nextItemId]; | |
} | |
return null; | |
} | |
public IEnumerable<Tuple<T, ISuffixAutomataState<T>>> GetAllNexts() | |
{ | |
foreach(var v in next) | |
{ | |
yield return Tuple.Create(v.Key, (ISuffixAutomataState<T>)_states[v.Value]); | |
} | |
} | |
} | |
private readonly List<SuffixAutomataState> _states; | |
private readonly object _comparer; | |
private int _last; | |
public SuffixAutomata(int expectedLength = 0, object comparer = null) | |
{ | |
if (comparer != null && !(comparer is IEqualityComparer<T> || comparer is IComparer<T>)) | |
throw new ArgumentException("Invalid comparer type."); | |
_comparer = comparer ?? EqualityComparer<T>.Default; | |
_states = new List<SuffixAutomataState>(expectedLength * 2); | |
_states.Add(new SuffixAutomataState(_states) | |
{ | |
Length = 0, | |
link = -1, | |
next = new SuffixMap<int>(_comparer) | |
}); | |
_last = 0; | |
} | |
public void Extend(IEnumerable<T> elements) | |
{ | |
if (elements == null) | |
return; | |
foreach(var e in elements) | |
{ | |
InternalExtend(e); | |
} | |
RecalculateTerminals(); | |
} | |
public void Extend(T c) | |
{ | |
InternalExtend(c); | |
RecalculateTerminals(); | |
} | |
private void InternalExtend(T c) | |
{ | |
int cur = _states.Count; | |
_states.Add(new SuffixAutomataState(_states) | |
{ | |
Length = _states[_last].Length + 1, | |
link = -1, | |
next = new SuffixMap<int>(_comparer) | |
}); | |
int p = _last; | |
while (p != -1 && !_states[p].next.ContainsKey(c)) | |
{ | |
_states[p].next.Set(c, cur); | |
p = _states[p].link; | |
} | |
if (p == -1) | |
{ | |
_states[cur].link = 0; | |
} | |
else | |
{ | |
int q = _states[p].next.Get(c); | |
if (_states[p].Length + 1 == _states[q].Length) | |
{ | |
_states[cur].link = q; | |
} | |
else | |
{ | |
int clone = _states.Count; | |
_states.Add(new SuffixAutomataState(_states) | |
{ | |
Length = _states[p].Length + 1, | |
next = new SuffixMap<int>(_states[q].next), | |
link = _states[q].link, | |
}); | |
while (p != -1 && _states[p].next.Get(c) == q) | |
{ | |
_states[p].next.Set(c, clone); | |
p = _states[p].link; | |
} | |
_states[q].link = _states[cur].link = clone; | |
} | |
} | |
_last = cur; | |
} | |
private void RecalculateTerminals() | |
{ | |
foreach (var s in _states) | |
s.IsTerminal = false; | |
var cur = _last; | |
while(cur > 0) | |
{ | |
_states[cur].IsTerminal = true; | |
cur = _states[cur].link; | |
} | |
} | |
public ISuffixAutomataState<T> First() | |
{ | |
return _states.First(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment