Last active
August 8, 2020 05:53
-
-
Save zgover/8130eb3b7dd6f915316435e8f0cf657e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @license | |
* Copyright Gover Construction LLC. All Rights Reserved. | |
* | |
* Use of this source code is governed by an MIT-style license that can be | |
* found in the LICENSE file at https://gist.github.com/zgover/678d51ffc2477d2e714052d7154f784b | |
*/ | |
/** | |
* Removes stop words (e.g. the, we, you, did, a, etc.) from | |
* the provided string of text | |
* | |
* @param {String} str the text to remove stop words from | |
* @param {Object} opt the configuration options | |
* {String} separator the character between two potential stop words | |
**/ | |
export default function removeStopWords(str, opt = {}) { | |
const { separator } = { separator: ' ', ...opt } | |
const words = [] | |
String(str).split(separator).forEach((word) => { | |
//if (stopWords.some((stopWord) => word !== stopWord)) { | |
if (!stopWords.includes(word.toLowerCase()) { // Refinement (untested) | |
words.push(word) | |
} | |
}) | |
return words | |
} | |
export const stopWords = [ | |
'a', 'about', 'above', 'after', 'again', 'against', 'all', 'am', 'an', 'and', 'any', 'are', 'as', 'at', 'be', | |
'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'could', 'did', 'do', 'does', | |
'doing', 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'has', 'have', 'having', 'he', | |
'he\'d', 'he\'ll', 'he\'s', 'her', 'here', 'here\'s', 'hers', 'herself', 'him', 'himself', 'his', 'how', | |
'how\'s', 'i', 'i\'d', 'i\'ll', 'i\'m', 'i\'ve', 'if', 'in', 'into', 'is', 'it', 'it\'s', 'its', 'itself', 'let\'s', | |
'me', 'more', 'most', 'my', 'myself', 'nor', 'of', 'on', 'once', 'only', 'or', 'other', 'ought', 'our', 'ours', | |
'ourselves', 'out', 'over', 'own', 'same', 'she', 'she\'d', 'she\'ll', 'she\'s', 'should', 'so', | |
'some', 'such', 'than', 'that', 'that\'s', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'there', | |
'there\'s', 'these', 'they', 'they\'d', 'they\'ll', 'they\'re', 'they\'ve', 'this', 'those', 'through', 'to', | |
'too', 'under', 'until', 'up', 'very', 'was', 'we', 'we\'d', 'we\'ll', 'we\'re', 'we\'ve', 'were', 'what', | |
'what\'s', 'when', 'when\'s', 'where', 'where\'s', 'which', 'while', 'who', 'who\'s', 'whom', 'why', 'why\'s', | |
'with', 'would', 'you', 'you\'d', 'you\'ll', 'you\'re', 'you\'ve', 'your', 'yours', 'yourself', 'yourselves', | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment