Skip to content

Instantly share code, notes, and snippets.

@pathikrit
Last active March 5, 2016 06:25
Show Gist options
  • Save pathikrit/233d5dcd8b1733007491 to your computer and use it in GitHub Desktop.
Save pathikrit/233d5dcd8b1733007491 to your computer and use it in GitHub Desktop.
Auto suggester
class AutoSuggest(corpus: String, alphabet: Seq[Char] = 'a' to 'z', depth: Int = 2) {
val words = s"[${alphabet.head}-${alphabet.last}]+".r
.findAllIn(corpus.toLowerCase).toSeq
.groupBy(_.toSeq).mapValues(_.size)
.par withDefaultValue 0
def editDistance(a: Seq[Char], b: Seq[Char]) = {
lazy val d: Stream[Stream[Int]] = Stream.tabulate(a.length + 1, b.length + 1) {
case (i, j) if (i - j).abs > depth => Int.MaxValue
case (i, 0) => i
case (0, j) => j
case (i, j) if a(i-1) == b(j-1) => d(i-1)(j-1)
case (i, j) => 1 + (d(i)(j-1) min d(i-1)(j) min d(i-1)(j-1))
}
d(a.length)(b.length)
}
def apply(word: String) = words maxBy {case (i, frequency) => -editDistance(i, word) -> frequency}
}
// Demo:
object AutoSuggest extends App {
val suggest = new AutoSuggest(io.Source.fromFile("big.txt").mkString) // curl http://norvig.com/big.txt > big.txt
Seq("speling", "korrecter", "corrected", "xyz") foreach {w => println(s"$w: ${suggest(w)}")}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment