Skip to content

Instantly share code, notes, and snippets.

@zo0m
Last active December 20, 2016 10:09
Show Gist options
  • Save zo0m/73bf5173d0117ce2c21b16df9fa98d5a to your computer and use it in GitHub Desktop.
Save zo0m/73bf5173d0117ce2c21b16df9fa98d5a to your computer and use it in GitHub Desktop.
String ===> Words array
/*
getNormalizedWordsArray("Hello, my name is Igor. What are you doing?:)")
>>> ["Hello", ",", " ", "my", " ", "name", " ", "is", " ", "Igor", ".", " ", "What", " ", "are", " ", "you", " ", "doing", "?:)"]
*/
function getNormalizedWordsArray(phrase) {
const punctuationRegEx = /[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~]/g;
const spaceRegEx = /\s+/g;
const digitRegEx = /\d/g;
const wordRegEx = /\w/g;
const rulesArray = [spaceRegEx, punctuationRegEx, digitRegEx, wordRegEx];
const rulesNames = ['space', 'punct', 'digit', 'word'];
let lastSuccessRule = null;
const getWordsArrays = stringToWordify => {
let charArray = Array.from(stringToWordify);
return charArray.reduce((wordsArrays, char) => {
for (let rule of rulesArray) {
if (new RegExp(rule).test(char)) {
if (lastSuccessRule !== rule) {
wordsArrays.push('');
}
lastSuccessRule = rule;
wordsArrays[wordsArrays.length - 1] += char;
break;
}
}
return wordsArrays;
}, []);
};
return getWordsArrays(phrase);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment