Skip to content

Instantly share code, notes, and snippets.

@kanru
Created October 29, 2022 07:47
Show Gist options
  • Save kanru/8d7b3a7cc00860c9b8b00230e197287f to your computer and use it in GitHub Desktop.
Save kanru/8d7b3a7cc00860c9b8b00230e197287f to your computer and use it in GitHub Desktop.
Suffix Tree
/*
* Copyright (c) 2005 Kanru Chen
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
using System;
using System.IO;
using System.Text;
using Nemerle.Utility;
namespace KC {
type StringPair = int * int;
type TransitionPair = StringPair * State;
public class State
{
class TransitionNode {
public this (k: char, v: TransitionPair) {
Key = k;
Value = v;
}
public mutable Key : char;
public mutable Value : TransitionPair;
public mutable Next : TransitionNode = null;
}
mutable transitions : TransitionNode;
mutable count = 0;
[Accessor (flags=WantSetter)]
mutable s_link : State;
public Item[a: char] : TransitionPair
{
get {
def ret (t) {
match (t) {
| null =>
((0, 0), null);
| _ when t.Key == a =>
t.Value;
| _ =>
ret (t.Next);
}
}
ret (transitions);
}
set {
ret : {
if (transitions == null) {
transitions = TransitionNode (a, value);
count++;
} else {
mutable last : TransitionNode = null;
for (mutable i = transitions; i != null; i = i.Next) {
when (i.Key == a) {
i.Value = value;
ret ();
}
last = i;
}
last.Next = TransitionNode (a, value);
count++;
}
}
}
}
public Has (a: char) : bool
{
def ret (t) {
match (t) {
| null =>
false;
| _ when t.Key == a =>
true;
| _ =>
ret (t.Next);
}
}
ret (transitions);
}
public Transitions : array [char]
{
get {
if (transitions == null)
null;
else {
def a = array (count);
def build(i, t) {
unless (t == null) {
a[i] = t.Key;
build(i+1, t.Next);
}
}
build(0, transitions);
a;
}
}
}
}
public class SuffixTree
{
inf = Int32.MaxValue;
[Accessor]
text : StringBuilder = StringBuilder ();
root : State = State ();
bottom : State = State ();
mutable i : int;
mutable k : int;
mutable working : State;
public this (input: string)
{
init ();
def do_build () {
unless (i >= input.Length) {
Append (input[i]);
do_build ();
}
}
do_build ();
}
public this ()
{
init ();
}
init () : void
{
root.SLink = bottom;
working = root;
k = 0; i = 0;
}
public Append (t: char) : void
{
_ = text.Append (t);
bottom[text[i]] = ((i, i), root);
def (s', k') = UpDate (working, k, i);
def (s', k') = Canonize (s', k', i);
working = s';
k = k';
i = i + 1;
}
UpDate (s: State, k: int, i: int) : State * int
{
def do_update (o, s', k') {
def (is_end_point, r) = TestAndSplit (s', k', i-1, i);
if (is_end_point) {
unless (o == root: object)
o.SLink = s';
(s', k');
} else {
r[text[i]] = ((i, inf), State());
unless (o == root: object)
o.SLink = r;
def (s', k') = Canonize (s'.SLink, k', i-1);
do_update (r, s', k');
}
}
do_update (root, s, k);
}
TestAndSplit (s: State, k: int, p: int, t: int) : bool * State
{
if (k <= p) {
def ((k', p'), s') = s[text[k]];
if (text[t] == text[k' + p - k + 1])
(true, s);
else {
def r = State ();
s[text[k]] = ((k', k'+p-k), r);
r[text[k'+p-k+1]] = ((k'+p-k+1, p'), s');
(false, r);
}
} else {
(s.Has(text[t]), s);
}
}
Canonize (mutable s: State, mutable k: int, p: int) : State * int
{
if (p < k)
(s, k);
else {
mutable s' :State = null;
mutable k' = 0;
mutable p' = 0;
((k', p'), s') = s[text[k]];
while (p'-k' <= p-k) {
k += p'-k' + 1;
s = s';
when (k <= p) {
((k', p'), s') = s[text[k]];
}
}
(s, k);
}
}
ShowTree (s: State, prefix: string) : void
{
def trans = s.Transitions;
def text = text.ToString ();
when (trans != null) {
for (mutable i = 0; i < trans.Length; i++) {
def ((k, p), s') = s[trans[i]];
def p = if (p == inf) text.Length - k else p - k + 1;
match (i) {
| x when x == trans.Length-1 =>
Console.WriteLine ("{0}`-- {1}", prefix, text.Substring(k, p));
ShowTree (s', prefix + " ");
| _ =>
Console.WriteLine ("{0}|-- {1}", prefix, text.Substring(k, p));
ShowTree (s', prefix + "| ");
}
}
}
}
public PrintTree () : void
{
Console.WriteLine (".");
ShowTree (root, "");
}
CountStates (s: State) : int
{
def trans = s.Transitions;
match (trans) {
| x when x != null =>
mutable count = trans.Length;
for (mutable i = 0; i < trans.Length; i++) {
def ((_, _), s') = s[trans[i]];
count += CountStates (s');
}
count;
| _ => -1;
}
}
public PrintStats () : void
{
Console.WriteLine ("Have {0} internal nodes", CountStates (root) + 1);
}
}
public module T {
public Main (args: array [string]) : void
{
match (args.Length) {
| x when (x >= 2 && args[0] == "-f") =>
def reader = StreamReader (args[1], Text.ASCIIEncoding ());
def st = SuffixTree ();
def do_build () {
def t = reader.Read ();
when (t != -1) {
def x = t :> char;
when ( (x <= 'z' && x >= 'a') || (x <= 'Z' && x >= 'A') || (x <= '9' && x >= '0') ) {
st.Append (x);
}
do_build ();
}
}
do_build ();
st.Append ('#');
st.PrintStats ();
| x when x == 1 =>
def st = SuffixTree (args[0] + "#");
st.PrintTree ();
st.PrintStats ();
| _ =>
Console.WriteLine ("Usage: suffix [string|-f filename]");
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment