Created
February 27, 2017 04:37
-
-
Save ryanbsherrill/232f7aa9bfdff8a165e9a8cefdbec3bd to your computer and use it in GitHub Desktop.
analyze a most frequent word problem
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* GET TOKENS: this function takes a 'raw string' and 'gets'/organizes every | |
character and word in 'alphabetical order' inside of an array. each of the words | |
and chars which were once part of a single string have now been given their own | |
index positions */ | |
function getTokens(rawString) { | |
// NB: `.filter(Boolean)` removes any falsy items from an array | |
return rawString.toLowerCase().split(/[ ,!.";:-]+/).filter(Boolean).sort(); | |
} | |
// mostFrequentWord is a function that takes a string of text as its argument | |
// and returns the word or char that occurs most frequently within the text. | |
// it only does this based on consecutive chars --> it does not break up every | |
// single word into individual chars. | |
function mostFrequentWord(text) { | |
/* first, the function uses another function: | |
'getTokens' to break up the original string into | |
an array of individual words/letters/characters, and | |
puts them all inside of an array (words) --> | |
in alphabetical order */ | |
var words = getTokens(text); | |
/* next, an object is created to log the frequency of each word --> | |
where the word itself will be used as the key and the number of times | |
it occurs as the value of each key */ | |
var wordFrequencies = {}; | |
/* here, they use a for loop to iterate through the word array --> | |
THE ELSE STATEMENT: each time a word (or char) is found in the words array that | |
has not already been assigned to a key in the wordFrequencies object, that word | |
(or char) is added to the wordFrequencies object with a value of 1 | |
THE IF STATEMENT: every time a word is found in the words array that matches a | |
key that already exists within the wordFrequencies object, the value of that | |
key (word) is incremented by 1 */ | |
for (var i = 0; i <= words.length; i++) { | |
if (words[i] in wordFrequencies) { | |
wordFrequencies[words[i]]++; | |
} | |
else { | |
wordFrequencies[words[i]]=1; | |
} | |
} | |
/* this is assigning the variable currentMaxKey with the word (key) found at | |
index 0 of the wordFrequencies object */ | |
var currentMaxKey = Object.keys(wordFrequencies)[0]; | |
/* this is assigning the variable currentMaxCount to the currentMaxKey key | |
in the wordFrequencies object */ | |
var currentMaxCount = wordFrequencies[currentMaxKey]; | |
for (var word in wordFrequencies) { | |
// if the word has a greater value than that of currentMaxCount | |
if (wordFrequencies[word] > currentMaxCount) { | |
// then currentMaxKey becomes that word | |
currentMaxKey = word; | |
// and currentMaxCount becomes the value of that word | |
currentMaxCount = wordFrequencies[word]; | |
} | |
} | |
// finally, they return the currentMaxKey, which after having iterated through | |
// every key and value, will give us the one that has occurred most frequently in | |
// the string that was put in as the argument to the function | |
return currentMaxKey; | |
} | |
// this is me testing it to figure out what it does | |
var myString = "hi hello ryan benjamin this that the other! * & % ^ # ^ &)#^(^# what are you doing? ryan this that what where who when this that what where who when i i i i knnowjidf;hkhf hfjghhgjlei74nw7s77&&&&E))_())#_"; | |
// this was just a console.log that I used to test it in the GC console | |
console.log(mostFrequentWord(myString)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment