Skip to content

Instantly share code, notes, and snippets.

@elbakramer
Created April 27, 2018 09:58
Show Gist options
  • Save elbakramer/ea87619d5313eb2a6f7b534e06c73a3d to your computer and use it in GitHub Desktop.
Save elbakramer/ea87619d5313eb2a6f7b534e06c73a3d to your computer and use it in GitHub Desktop.
naive implementation of ukkonen's algorithm
import scala.collection.mutable.HashMap
object SuffixTree {
class State() {
val transitions: HashMap[Char,Transition] = HashMap.empty[Char,Transition]
var suffixLink: Option[State] = None
def printDebugString(prefix: String, label: String): Unit = {
val kvpairs = transitions.toSeq.sortBy(_._1)
val labels = kvpairs.map(t => t._2).map(t => t.label).toArray.toSeq
println(prefix, label, "[" + labels.mkString(", ") + "]")
for ((k,t) <- kvpairs) {
t.to.printDebugString(k.toString, t.label)
}
}
}
class Transition(val string: String, val from: State, val start: Int, val end: Int, val to: State) {
def label: String = string.slice(start - 1, end)
}
def ukkonensAlgorithm(): State = {
val alphabet = "abcdefghijklmnopqrstuvwxyz"
val string = "abcabxabcd#"
def inf = string.length
def T = string
def t_(i: Int): Char = {
if (i > 0) {
string(i - 1)
} else {
alphabet(- i - 1)
}
}
def m = alphabet.length
// line 1
val root = new State()
val perp = new State()
def createTransition(from: State, startAndEnd: (Int, Int), to: State): Unit = {
val start = startAndEnd._1
val end = startAndEnd._2
val transition = new Transition(string, from, start, end, to)
from.transitions.put(t_(start), transition)
}
def createSuffixLink(arg: State, res: State): Unit = {
arg.suffixLink = Some(res)
}
def findTransition(t: Char, from: State): Option[Transition] = {
from.transitions.get(t)
}
def update(state: State, ki: (Int, Int)): (State, Int) = {
var s = state
var k = ki._1
var i = ki._2
var sk = (s, k)
var oldr = root
var endPointAndR = testAndSplit(s, (k, i - 1), t_(i))
var endPoint = endPointAndR._1
var r = endPointAndR._2
var rprime = new State()
while (!endPoint) {
createTransition(r, (i, inf), rprime)
if (oldr != root) {
createSuffixLink(oldr, r)
}
oldr = r
sk = canonize(s.suffixLink.get, (k, i - 1))
s = sk._1
k = sk._2
endPointAndR = testAndSplit(s, (k, i - 1), t_(i))
endPoint = endPointAndR._1
r = endPointAndR._2
}
if (oldr != root) {
createSuffixLink(oldr, s)
}
(s, k)
}
def testAndSplit(state: State, kp: (Int, Int), t: Char): (Boolean, State) = {
var s = state
var k = kp._1
var p = kp._2
if (k <= p) {
var gprime = findTransition(t_(k), s).get
var pprime = gprime.end
var kprime = gprime.start
var sprime = gprime.to
if (t == t_(kprime + p - k + 1)) {
(true, s)
} else {
val r = new State()
createTransition(r, (kprime + p - k + 1, pprime), sprime)
createTransition(s, (kprime, kprime + p - k), r)
(false, r)
}
} else {
val trans = findTransition(t, s)
if (!trans.isDefined) {
(false, s)
} else {
(true, s)
}
}
}
def canonize(state: State, kp: (Int, Int)): (State, Int) = {
var s = state
var k = kp._1
var p = kp._2
if (p < k) {
(s, k)
} else {
var gprime = findTransition(t_(k), s).get
var pprime = gprime.end
var kprime = gprime.start
var sprime = gprime.to
while ((pprime - kprime) <= (p - k)) {
k = k + pprime - kprime + 1
s = sprime
if (k <= p) {
gprime = findTransition(t_(k), s).get
pprime = gprime.end
kprime = gprime.start
sprime = gprime.to
}
}
(s, k)
}
}
for (j <- 1 to m) {
createTransition(perp, (-j, -j), root)
}
createSuffixLink(root, perp)
var s = root
var k = 1
var i = 0
var sk = (s, k)
while (t_(i + 1) != '#') {
i = i + 1
sk = update(s, (k, i))
s = sk._1
k = sk._2
sk = canonize(s, (k, i))
s = sk._1
k = sk._2
}
root
}
def main(args: Array[String]): Unit = {
val root = ukkonensAlgorithm()
root.printDebugString("none", "none")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment