Created
November 30, 2023 19:46
-
-
Save kynatro/8434fa85133562e1a4d3a8cf907bdcaf to your computer and use it in GitHub Desktop.
Word count and sort module
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export const IGNORE_WORDS = ['', 'a', 'an', 'and', 'as', 'be', 'by', 'for', 'has', 'he', 'her', 'hers', 'him', 'his', 'in', 'it', 'of', 'or', 'our', 'ours', 'the', 'their', 'theirs', 'this', 'to', 'us', 'was', 'we', 'were']; | |
/** | |
* Sort counted words by their count in descending order | |
* | |
* @param {Object} wordCounts Dictionary of words and counts | |
* @returns {Object} Sorted dictionary of word counts | |
*/ | |
export function sortByCount(wordCounts) { | |
let sorted = {}; | |
Object.keys(wordCounts) | |
.sort((a, b) => { | |
// Sort by count in descending order | |
if (wordCounts[a] < wordCounts[b]) { | |
return 1; | |
} else if (wordCounts[a] > wordCounts[b]) { | |
return -1; | |
} | |
// Sort alphabetically in ascending order for like counts | |
else if (a.toLowerCase() > b.toLowerCase()) { | |
return 1; | |
} else if (a.toLowerCase() < b.toLowerCase()) { | |
return -1; | |
} | |
return 0; | |
}) | |
.forEach(word => sorted[word] = wordCounts[word]); | |
return sorted; | |
} | |
/** | |
* Split text into words | |
* | |
* Removes IGNORE_WORDS and can optionally ignore case sensitivity. | |
* | |
* @param {String} text Corpus of text to split | |
* @param {Object} options | |
* @param {Boolean} caseSensitive (false) Respect case sensitivity | |
* @param {Boolean} removeIgnoredWords (true) Include all words in the returned Array | |
* @returns {Array} Array of words | |
*/ | |
export function words(text, options = { caseSensitive: false, removeIgnoredWords: true }) { | |
const { caseSensitive, removeIgnoredWords } = options; | |
let words = (caseSensitive ? text : text.toLowerCase()).split(' ') | |
// Trim punctuation | |
.map(word => word.replace(/^[^\w]+|[^\w]+$/, '')) | |
if (removeIgnoredWords) { | |
words = words.reduce((words, word) => !IGNORE_WORDS.includes(word) ? [...words, word] : words, []); | |
} | |
return words; | |
} | |
/** | |
* Count unique instances of words | |
* | |
* @param {Array} words Array of words to count uniquely | |
* @returns {Object} Dictionary of word counts | |
*/ | |
export function wordCounts(words) { | |
return words.reduce((obj, word) => ({ | |
...obj, | |
[word]: (obj[word] || 0) + 1 | |
}), {}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment