Last active
April 17, 2016 04:23
-
-
Save sonyseng/8657ba1f811df6f9b2e26b00f89fe9cd to your computer and use it in GitHub Desktop.
Will tokenize quoted and unquoted strings for searching
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var str = '\"hello world\" arg1 arg2 \'multi arg that has nested strings \"haha hehe\"\' arg3 anotherArg _arg4_\''; | |
function tokenizer (str) { | |
var tokens = []; | |
var k, i, temp; | |
var strLen = str.length; | |
for (i = 0; i < strLen; i++) { | |
if (str[i] === '\"') { | |
for (temp = [], k = i+1; k < strLen && str[k] !== '\"'; k++) { | |
temp.push(str[k]); | |
} | |
tokens.push(temp.join('')); | |
i = k+1; | |
} | |
if (str[i] === '\'') { | |
for (temp = [], k = i+1; k < strLen && str[k] !== '\''; k++) { | |
temp.push(str[k]); | |
} | |
tokens.push(temp.join('')); | |
i = k+1; | |
} | |
if (str[i] && str[i] !== ' ') { | |
for (temp = [], k = i; k < strLen && str[k] !== ' '; k++) { | |
temp.push(str[k]); | |
} | |
tokens.push(temp.join('')); | |
i = k; | |
} | |
} | |
return tokens; | |
} | |
tokenizer(str); | |
/* Output: | |
array ==> | |
0:"hello world" | |
1:arg1" | |
2:"arg2" | |
3:"multi arg that has nested strings "haha hehe"" | |
4:"arg3" | |
5:"anotherArg" | |
6:"_arg4_'" | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment