Last active
March 5, 2016 06:25
-
-
Save pathikrit/233d5dcd8b1733007491 to your computer and use it in GitHub Desktop.
Auto suggester
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class AutoSuggest(corpus: String, alphabet: Seq[Char] = 'a' to 'z', depth: Int = 2) { | |
val words = s"[${alphabet.head}-${alphabet.last}]+".r | |
.findAllIn(corpus.toLowerCase).toSeq | |
.groupBy(_.toSeq).mapValues(_.size) | |
.par withDefaultValue 0 | |
def editDistance(a: Seq[Char], b: Seq[Char]) = { | |
lazy val d: Stream[Stream[Int]] = Stream.tabulate(a.length + 1, b.length + 1) { | |
case (i, j) if (i - j).abs > depth => Int.MaxValue | |
case (i, 0) => i | |
case (0, j) => j | |
case (i, j) if a(i-1) == b(j-1) => d(i-1)(j-1) | |
case (i, j) => 1 + (d(i)(j-1) min d(i-1)(j) min d(i-1)(j-1)) | |
} | |
d(a.length)(b.length) | |
} | |
def apply(word: String) = words maxBy {case (i, frequency) => -editDistance(i, word) -> frequency} | |
} | |
// Demo: | |
object AutoSuggest extends App { | |
val suggest = new AutoSuggest(io.Source.fromFile("big.txt").mkString) // curl http://norvig.com/big.txt > big.txt | |
Seq("speling", "korrecter", "corrected", "xyz") foreach {w => println(s"$w: ${suggest(w)}")} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment