Last active
January 6, 2017 22:00
-
-
Save MichaelChirico/dfca231640616b5e617a5b39265b4d09 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
dict.orig = tolower(readLines("/usr/share/dict/american-english")) | |
#words shorter than the longest padded with "" for simpler retrieval | |
dictDT = setDT(tstrsplit(dict.orig, split = "", fill = "")) | |
#lookup table for conversion | |
lookup = data.table(num = c(rep(2L, 3), rep(3L, 3), rep(4L, 3), | |
rep(5L, 3), rep(6L, 3), rep(7L, 4), | |
rep(8L, 3), rep(9L, 4)), | |
let = letters) | |
#the following are found in the dictionary and would need | |
# to be handled separately (accents should just be | |
# appended to matches for unaccented version): | |
# c("", "'", "á", "â", "å", "ä", | |
# "ç", "é", "è", "ê", "í", "ñ", | |
# "ó", "ô", "ö", "û", "ü") | |
lookup[ , num := paste0(num)] | |
for (col in names(dictDT)) { | |
dictDT[lookup, (col) := i.num, on = setNames("let", col)] | |
} | |
#back to character vector | |
dict.num = do.call(paste0, dictDT) | |
#sorting for faster vector search | |
idx = order(dict.num) | |
dict.num = dict.num[idx] | |
dict.orig = dict.orig[idx] | |
possibilities = function(input) dict.orig[dict.num == input] | |
#lapply for multiple inputs | |
lapply(c("43556", "469", "47", "48", "46464", "3637", "8447", "22882559", "9675", "67", "9428"), possibilities) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment