Last active
December 20, 2016 10:09
-
-
Save zo0m/73bf5173d0117ce2c21b16df9fa98d5a to your computer and use it in GitHub Desktop.
String ===> Words array
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
getNormalizedWordsArray("Hello, my name is Igor. What are you doing?:)") | |
>>> ["Hello", ",", " ", "my", " ", "name", " ", "is", " ", "Igor", ".", " ", "What", " ", "are", " ", "you", " ", "doing", "?:)"] | |
*/ | |
function getNormalizedWordsArray(phrase) { | |
const punctuationRegEx = /[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~]/g; | |
const spaceRegEx = /\s+/g; | |
const digitRegEx = /\d/g; | |
const wordRegEx = /\w/g; | |
const rulesArray = [spaceRegEx, punctuationRegEx, digitRegEx, wordRegEx]; | |
const rulesNames = ['space', 'punct', 'digit', 'word']; | |
let lastSuccessRule = null; | |
const getWordsArrays = stringToWordify => { | |
let charArray = Array.from(stringToWordify); | |
return charArray.reduce((wordsArrays, char) => { | |
for (let rule of rulesArray) { | |
if (new RegExp(rule).test(char)) { | |
if (lastSuccessRule !== rule) { | |
wordsArrays.push(''); | |
} | |
lastSuccessRule = rule; | |
wordsArrays[wordsArrays.length - 1] += char; | |
break; | |
} | |
} | |
return wordsArrays; | |
}, []); | |
}; | |
return getWordsArrays(phrase); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment