Created
February 5, 2012 20:07
-
-
Save gclaramunt/1747732 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io.Source | |
import java.io.File | |
/** | |
* Template for predictive text suggester | |
*/ | |
object Suggest { | |
type FreqMap= Map[String, Int]; | |
type FreqMaps = (FreqMap, FreqMap) | |
/** | |
* Please implement this method and any supporting methods/classes | |
*/ | |
def printSuggestions(f: File, seq: String): Unit = { | |
log("Parsing file " + f.getAbsolutePath) | |
val text=Source.fromFile(f) | |
val (exact,prefix)=text.getLines.foldLeft((Map[String, Int](), Map[String, Int]()))( scanLine(_,_,seq) ) | |
text.close() | |
log("Exact matches for " + seq + ": ") | |
sortByFreq(exact).foreach( wf => println (wf._1) ) | |
log("Prefix matches for " + seq + ": ") | |
sortByFreq(prefix).take(5).foreach( wf => println (wf._1) ) | |
} | |
def sortByFreq(m:FreqMap)= m.toList.sortWith( _._2 > _._2 ) | |
def lineToNumeric(line:String)= line.split("[^A-Za-z]").map( _.toLowerCase) | |
def scanLine( mp:FreqMaps, line:String, seq:String)= lineToNumeric(line).foldLeft(mp)(predictWord(_,_,seq)) | |
def predictWord (mp:FreqMaps, word:String, seq:String)= { | |
val (exact,prefix)=mp | |
val numword=toNumeric(word) | |
if (numword.startsWith(seq)){ | |
if (numword.length() == seq.length()) | |
(incrementCount(word,exact),prefix) | |
else | |
(exact,incrementCount(word,prefix)) | |
} else { | |
//do nothing | |
mp | |
} | |
} | |
def incrementCount(word:String, map:FreqMap)= { | |
val count=map.getOrElse(word,1) | |
map+(word->(count+1)) | |
} | |
/** | |
* Utility method to convert a word (e.g. "cat") to its numeric | |
* representation (e.g. 228). In must be lowercase. A runtime | |
* exception is thrown in case non alphabet characters are provided. | |
*/ | |
def toNumeric(word: String): String = word.map( convert ) | |
val KEYPAD_MAP = Map( | |
("abc"-> '2'), | |
("def"-> '3'), | |
("ghi"-> '4'), | |
("jkl"-> '5'), | |
("mno"-> '6'), | |
("pqrs"->'7'), | |
("tuv"-> '8'), | |
("wxyz"->'9') | |
) | |
def convert(c:Char)= KEYPAD_MAP.find(_._1 contains c).getOrElse(throw new RuntimeException("Can't convert char: " + c) )._2 | |
def main(args: Array[String]): Unit = { | |
if (args.length != 2) { | |
log("Usage: java Suggest filename seq") | |
System.exit(1) | |
} | |
val f: File = new File(args(0)) | |
if (!f.exists || !f.isFile) { | |
log(args(0) + " is not a valid file") | |
System.exit(2) | |
} | |
val seq: String = args(1) | |
printSuggestions(f, seq) | |
} | |
private def log(s: String): Unit = { | |
System.out.println(s) | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment