Created
May 25, 2017 15:50
-
-
Save Schmerb/31e4c72970cddb0d230c3bc5ef2115e9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function getTokens(rawString) { | |
// NB: `.filter(Boolean)` removes any falsy items from an array | |
return rawString.toLowerCase().split(/[ ,!.";:-]+/).filter(Boolean).sort(); // returns an array of words in the string using all multiples of white space and punctuation as the delimeter, removing any falsy values from array and finally sorting the array members alphabetically | |
} | |
function mostFrequentWord(text) { | |
var words = getTokens(text); // parses string into array, filtering out any white space or punctuation | |
var wordFrequencies = {}; // creates an object to store unique words and their frequencies | |
for (var i = 0; i <= words.length; i++) { // loops through all words in parsed string array | |
if (words[i] in wordFrequencies) { // checks if current word in array is an existing key in the frequencies object | |
wordFrequencies[words[i]]++; // if it is a key, it updates its value by +1 | |
} | |
else { | |
wordFrequencies[words[i]]=1; // if the word is not already a key, it creates a new key/value pair with the current word and sets its value to 1 | |
} | |
} | |
var currentMaxKey = Object.keys(wordFrequencies)[0]; // sets the first word in wordFreq object as the base key with max value for future comparisons | |
var currentMaxCount = wordFrequencies[currentMaxKey]; // sets the the value of the current max key (first word in wordFreq object) as the base value for max count to use in future comparisons | |
for (var word in wordFrequencies) { // iterates through each key in wordFreq object | |
if (wordFrequencies[word] > currentMaxCount) { // checks if the value of the word on the current iteration is greater than the current max word value | |
currentMaxKey = word; // if it is greater than previous max word count, it updates the max key variable to the current iteration's key (word) | |
currentMaxCount = wordFrequencies[word]; // & then it updates the max count variable to the current iteration's key (word) value (count) | |
} | |
} | |
return currentMaxKey; // Finally, the function returns the key/word with the highest count value | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment