Created
April 4, 2012 17:11
-
-
Save siddMahen/2303896 to your computer and use it in GitHub Desktop.
Finds Letter Frequencies In Text And Outputs Likely Letters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var freq = require("freq"), | |
fs = require("fs"), | |
eng = { | |
'e': 13.11, | |
't': 10.47, | |
'a': 8.15, | |
'o': 8.00, | |
'n': 7.10, | |
'r': 6.83, | |
'i': 6.35, | |
's': 6.10, | |
'h': 5.26, | |
'd': 3.79, | |
'l': 3.39, | |
'f': 2.92, | |
'c': 2.76, | |
'm': 2.54, | |
'u': 2.46, | |
'g': 1.99, | |
'y': 1.98, | |
'p': 1.98, | |
'w': 1.54, | |
'b': 1.44, | |
'v': 0.92, | |
'k': 0.42, | |
'x': 0.17, | |
'j': 0.13, | |
'q': 0.12, | |
'z': 0.08 | |
}, | |
bi = { | |
'th': 168, | |
'he': 132, | |
'an': 92, | |
're': 91, | |
'er': 88, | |
'in': 86, | |
'on': 71, | |
'at': 68, | |
'nd': 61, | |
'st': 53, | |
'es': 52, | |
'en': 51, | |
'of': 49, | |
'te': 46, | |
'ed': 46 | |
}; | |
var file = fs.readFileSync(process.argv[2], "utf8").replace(/[^a-z]/g,""); | |
bi_s = freq.sortObj(bi), | |
c_s = freq.sortObj(eng), | |
mbi_s = freq.bigrams(file), | |
mc_s = freq.charFreq(file); | |
console.log(mc_s); | |
var map = {}, | |
bi_map = {}; | |
// construct the mapping between letters in the | |
// ciphertext and the distribution of letter in english | |
for(var i = 0, len = c_s.length; i < len; i++){ | |
if(c_s[i] && mc_s[i]) | |
map[mc_s[i].node] = c_s[i].node; | |
} | |
// same as above, except with bigrams | |
for(var i = 0, len = bi_s.length; i < len; i++){ | |
if(bi_s[i] && mbi_s[i]) | |
bi_map[mbi_s[i].node] = bi_s[i].node; | |
} | |
// refine the letter to letter mapping by cross referencing | |
// it with the bigram mapping | |
var rpl_str = function(str){ | |
var keys = Object.keys(map); | |
for(var i = 0, len = keys.length; i < len; i++){ | |
str = str.replace(new RegExp(keys[i], "g"), map[keys[i]]); | |
} | |
return str; | |
} | |
console.log(map); | |
console.log(bi_map); | |
var keys = Object.keys(bi_map); | |
for(var i = 0, len = keys.length; i < len; i++){ | |
if(keys[i] !== (rpl_str(keys[i][0]) + rpl_str(keys[i][1]))){ | |
//console.log(keys[i]); | |
//console.log((rpl_str(keys[i][0]) + rpl_str(keys[i][1]))); | |
} | |
} | |
// replace the bigrams/letter in the ciphertext with the | |
// proper ones based on the mapping | |
/* | |
var keys = Object.keys(bi_map); | |
for(var i = 0, len = keys.length; i < len; i++){ | |
file = file.replace(new RegExp(keys[i], "g"), bi_map[keys[i]]); | |
} | |
console.log(file); | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment