Last active
December 31, 2015 18:59
-
-
Save pkukielka/8030702 to your computer and use it in GitHub Desktop.
Make implementation parallel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SpellChecker { | |
val alphabet = ('a' to 'z') ++ " " | |
def trim(word: String) = word.toLowerCase.filter(alphabet.contains(_)) | |
val file = io.Source.fromFile("big.txt").mkString | |
val wordsCount = file.split(' ').map(trim).groupBy(identity).mapValues(_.length).par | |
def distortions(w: String) = { | |
def disort(t: Int, d: Int, insert: String) = w.take(t) + insert + w.drop(d) | |
val transposes = for { i <- (0 to w.length - 2) } yield disort(i, i + 2, "" + w(i + 1) + w(i)) | |
val deletions = for { i <- (0 to w.length - 1) } yield disort(i, i + 1, "") | |
val replaces = for { i <- (0 to w.length - 1); l <- alphabet } yield disort(i , i + 1, l.toString) | |
val insertions = for { i <- (0 to w.length ); l <- alphabet } yield disort(i , i , l.toString) | |
(deletions ++ replaces ++ insertions ++ transposes).par | |
} | |
def matchingDistorsions(distortions: scala.collection.parallel.immutable.ParSeq[String]) = { | |
val matching = distortions.filter(wordsCount.contains(_)) | |
if (matching.isEmpty) None else Some(matching.maxBy(wordsCount(_))) | |
} | |
def correct(word: String): Option[String] = { | |
(if (wordsCount.contains(trim(word))) Some(word) else None) orElse | |
matchingDistorsions(distortions(trim(word))) orElse | |
matchingDistorsions(distortions(trim(word)).flatMap(distortions(_))) orElse | |
None | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Performance on my i5 machine for 8 letter word:
~20 corrections / sec in case of double distortion
~3000 corrections / sec in case of single distortion