Skip to content

Instantly share code, notes, and snippets.

@richard512
Last active August 29, 2015 14:20
Show Gist options
  • Save richard512/96707f9218976c5bfd34 to your computer and use it in GitHub Desktop.
Save richard512/96707f9218976c5bfd34 to your computer and use it in GitHub Desktop.
JavaScript Word Frequency
function wordFreq(sWords) {
// make sure input looks right
if (!sWords) return false;
if (typeof sWords != 'string') return false;
// converts to lowercase. trims leading and trailing spaces
// removes all commas, semicolins, and periods
// converts string to array of words, split by space
sWords = sWords.toLowerCase().trim();
sWords = sWords.replace(/[,;.]/g, '');
sWords = sWords.split(/[\s\/]+/g);
// wordFreq() will ignore these words:
var ignore = [
'and', 'the', 'to', 'a', 'of', 'for', 'as', 'i', 'with', 'it',
'is', 'on', 'that', 'this', 'can', 'in', 'be', 'has', 'if'
];
var wordfreqs = []
for (i in sWords) {
sWord = sWords[i];
if (ignore.indexOf(sWord) < 0) {
// if it's not an ignored word:
wordindex = findWithAttr(wordfreqs, 'word', sWord);
if (typeof wordindex == 'undefined') {
// new word:
wordfreqs.push({
word: sWord,
freq: 1
});
} else {
// duplicate word:
wordfreqs[wordindex].freq++;
}
}
}
return wordfreqs;
}
function findWithAttr(array, attr, value) {
for (var i = 0; i < array.length; i += 1) {
if (array[i][attr] === value) {
return i;
}
}
}
function sortByKey(array, key, order) {
switch (order) {
case 'numasc': // numeric, ascending
mysortfunc = function(x, y) {
return ((x < y) ? -1 : ((x > y) ? 1 : 0));
}
break;
default:
case 'numdesc': // numeric, descending
mysortfunc = function(x, y) {
return ((x > y) ? -1 : ((x < y) ? 1 : 0))
}
break;
}
return array.sort(function(a, b) {
var x = a[key];
var y = b[key];
return mysortfunc(x, y);
});
}
function getTopTenWordsOnPage() {
var doctext = document.body.innerText;
wordfreqs = wordFreq(doctext);
wordfreqs = sortByKey(wordfreqs, 'freq', 'numdesc');
wordfreqs = wordfreqs.slice(0, 10);
topten = [];
for (i in wordfreqs) {
topten.push([wordfreqs[i].freq, wordfreqs[i].word])
}
return topten;
}
topten = getTopTenWordsOnPage();
alert(topten.join('\n'));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment