Created
April 27, 2018 09:58
-
-
Save elbakramer/ea87619d5313eb2a6f7b534e06c73a3d to your computer and use it in GitHub Desktop.
naive implementation of ukkonen's algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.collection.mutable.HashMap | |
object SuffixTree { | |
class State() { | |
val transitions: HashMap[Char,Transition] = HashMap.empty[Char,Transition] | |
var suffixLink: Option[State] = None | |
def printDebugString(prefix: String, label: String): Unit = { | |
val kvpairs = transitions.toSeq.sortBy(_._1) | |
val labels = kvpairs.map(t => t._2).map(t => t.label).toArray.toSeq | |
println(prefix, label, "[" + labels.mkString(", ") + "]") | |
for ((k,t) <- kvpairs) { | |
t.to.printDebugString(k.toString, t.label) | |
} | |
} | |
} | |
class Transition(val string: String, val from: State, val start: Int, val end: Int, val to: State) { | |
def label: String = string.slice(start - 1, end) | |
} | |
def ukkonensAlgorithm(): State = { | |
val alphabet = "abcdefghijklmnopqrstuvwxyz" | |
val string = "abcabxabcd#" | |
def inf = string.length | |
def T = string | |
def t_(i: Int): Char = { | |
if (i > 0) { | |
string(i - 1) | |
} else { | |
alphabet(- i - 1) | |
} | |
} | |
def m = alphabet.length | |
// line 1 | |
val root = new State() | |
val perp = new State() | |
def createTransition(from: State, startAndEnd: (Int, Int), to: State): Unit = { | |
val start = startAndEnd._1 | |
val end = startAndEnd._2 | |
val transition = new Transition(string, from, start, end, to) | |
from.transitions.put(t_(start), transition) | |
} | |
def createSuffixLink(arg: State, res: State): Unit = { | |
arg.suffixLink = Some(res) | |
} | |
def findTransition(t: Char, from: State): Option[Transition] = { | |
from.transitions.get(t) | |
} | |
def update(state: State, ki: (Int, Int)): (State, Int) = { | |
var s = state | |
var k = ki._1 | |
var i = ki._2 | |
var sk = (s, k) | |
var oldr = root | |
var endPointAndR = testAndSplit(s, (k, i - 1), t_(i)) | |
var endPoint = endPointAndR._1 | |
var r = endPointAndR._2 | |
var rprime = new State() | |
while (!endPoint) { | |
createTransition(r, (i, inf), rprime) | |
if (oldr != root) { | |
createSuffixLink(oldr, r) | |
} | |
oldr = r | |
sk = canonize(s.suffixLink.get, (k, i - 1)) | |
s = sk._1 | |
k = sk._2 | |
endPointAndR = testAndSplit(s, (k, i - 1), t_(i)) | |
endPoint = endPointAndR._1 | |
r = endPointAndR._2 | |
} | |
if (oldr != root) { | |
createSuffixLink(oldr, s) | |
} | |
(s, k) | |
} | |
def testAndSplit(state: State, kp: (Int, Int), t: Char): (Boolean, State) = { | |
var s = state | |
var k = kp._1 | |
var p = kp._2 | |
if (k <= p) { | |
var gprime = findTransition(t_(k), s).get | |
var pprime = gprime.end | |
var kprime = gprime.start | |
var sprime = gprime.to | |
if (t == t_(kprime + p - k + 1)) { | |
(true, s) | |
} else { | |
val r = new State() | |
createTransition(r, (kprime + p - k + 1, pprime), sprime) | |
createTransition(s, (kprime, kprime + p - k), r) | |
(false, r) | |
} | |
} else { | |
val trans = findTransition(t, s) | |
if (!trans.isDefined) { | |
(false, s) | |
} else { | |
(true, s) | |
} | |
} | |
} | |
def canonize(state: State, kp: (Int, Int)): (State, Int) = { | |
var s = state | |
var k = kp._1 | |
var p = kp._2 | |
if (p < k) { | |
(s, k) | |
} else { | |
var gprime = findTransition(t_(k), s).get | |
var pprime = gprime.end | |
var kprime = gprime.start | |
var sprime = gprime.to | |
while ((pprime - kprime) <= (p - k)) { | |
k = k + pprime - kprime + 1 | |
s = sprime | |
if (k <= p) { | |
gprime = findTransition(t_(k), s).get | |
pprime = gprime.end | |
kprime = gprime.start | |
sprime = gprime.to | |
} | |
} | |
(s, k) | |
} | |
} | |
for (j <- 1 to m) { | |
createTransition(perp, (-j, -j), root) | |
} | |
createSuffixLink(root, perp) | |
var s = root | |
var k = 1 | |
var i = 0 | |
var sk = (s, k) | |
while (t_(i + 1) != '#') { | |
i = i + 1 | |
sk = update(s, (k, i)) | |
s = sk._1 | |
k = sk._2 | |
sk = canonize(s, (k, i)) | |
s = sk._1 | |
k = sk._2 | |
} | |
root | |
} | |
def main(args: Array[String]): Unit = { | |
val root = ukkonensAlgorithm() | |
root.printDebugString("none", "none") | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment