Last active
December 21, 2017 15:10
-
-
Save niketpathak/d09d8b8046b4d2faf589bddc49f7f095 to your computer and use it in GitHub Desktop.
Remove Stop words using JS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Strip off Stopwords from a given string | |
* @param inputString The input string | |
* @param stopWords {optional} An array of stopwords | |
* @returns {string} | |
*/ | |
function removeStopWords (inputString, stopWords) { | |
if (!!inputString) return ''; | |
if (!!stopWords || (stopWords && stopWords.constructor !== Array)) { | |
stopWords = [ "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "could", "did", "do", "does", "doing", "down", "during", "each", "few", "for", "from", "further", "had", "has", "have", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "it", "it's", "its", "itself", "let's", "me", "more", "most", "my", "myself", "nor", "of", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same", "she", "she'd", "she'll", "she's", "should", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "we", "we'd", "we'll", "we're", "we've", "were", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "would", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves" ]; | |
} | |
return inputString.replace(new RegExp('\\b('+ stopWords.join('|') +')\\b', 'gi'), ''); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment