Skip to content

Instantly share code, notes, and snippets.

@mbildner
Last active August 29, 2015 14:04
Show Gist options
  • Save mbildner/c724fdc04edd635b6db7 to your computer and use it in GitHub Desktop.
Save mbildner/c724fdc04edd635b6db7 to your computer and use it in GitHub Desktop.
clean tokenizer
var str = "users[allUsers['123124'].username].first";
var allUsers = {
'123124': {username: 'mbildner'}
};
var users = {
'mbildner': {
first: 'moshe',
age: 25
}
};
var name = users[allUsers['123124'].username].first;
var str = "users[allUsers['123124'].username].first";
function markIndex (charArr) {
return charArr.map(function (item, indx) {
item.indx = indx;
return item;
});
}
function markDepth (charArr) {
var currentChar;
var depth = 0;
var depthMarkedArr = charArr.map(function (item, indx) {
if (item.character==='[') {
depth++;
} else if (item.character===']') {
depth--;
}
item.depth = depth;
return item;
});
return depthMarkedArr;
}
function markStrings (charArr) {
var insideSingleQuote = false;
var insideDoubleQuote = false;
var wasString = false;
function isString () {
var __isString = insideDoubleQuote || insideSingleQuote;
return __isString;
}
var stringMarkedArr = charArr.map(function (item, indx) {
wasString = isString();
if (item.character === '"' && !insideSingleQuote) {
insideDoubleQuote = !insideDoubleQuote;
}
if (item.character === '\'' && !insideDoubleQuote) {
insideSingleQuote = !insideSingleQuote;
}
// prevent leading quotemark from getting marked as a string
// this logic should be moved to isString function
item.isString = wasString
? item.isString = isString()
: item.isString = false;
return item;
});
return stringMarkedArr;
}
function splitStr (str) {
return str.split('')
.map(function (item) {
return {
character: item
}
});
}
function stripBrackets (charArr) {
var filteredArr = charArr.filter(function (item) {
return item.isString || !(item.character==='[' || item.character===']');
});
return filteredArr;
}
function stripQuotes (charArr) {
var filteredArr = charArr.filter(function (item) {
return item.isString || (!(item.character==='\'' || item.character==='"'));
});
return filteredArr;
}
function combineProcessedCharArr (charArr) {
var currentChar;
var nextChar;
var collector = [];
var tokens = [];
function finishToken () {
var token = {};
token.depth = collector[0].depth;
token.isString = collector.some(function (item) {
return item.isString;
});
token.value = collector.map(function (item) {
return item.character;
}).join('');
// clear the collector
collector.splice(0, collector.length);
tokens.push(token);
}
for (var i=0; i<charArr.length; i++) {
currentChar = charArr[i];
nextChar = charArr[i+1];
if (currentChar && nextChar) {
collector.push(currentChar);
if (currentChar.depth !== nextChar.depth) {
finishToken();
} else if (currentChar.character === '.' && !currentChar.isString) {
finishToken();
}
}
}
return tokens;
}
function tokenize (str) {
return combineProcessedCharArr
(stripQuotes
(stripBrackets
(markStrings
(markDepth
(markIndex
(splitStr(str)))))));
}
var tokens = tokenize(str);
// console.log(tokens);
// [ { depth: 0, isString: false, value: 'users' },
// { depth: 1, isString: false, value: 'allUsers' },
// { depth: 2, isString: true, value: '123124' },
// { depth: 1, isString: false, value: '.' },
// { depth: 1, isString: false, value: 'username' },
// { depth: 0, isString: false, value: '.' } ]
//
function markContext (tokenArr) {
var recordedDepths = {};
return tokens.map(function (token) {
if (!recordedDepths.hasOwnProperty(token.depth)) {
recordedDepths[token.depth] = true;
token.isFirstAtDepth = true;
} else {
token.isFirstAtDepth = false;
}
return token;
});
}
// var contextualizedTokens = markContext(tokens);
// [ { depth: 0,
// isString: false,
// value: 'users',
// isFirstAtDepth: true },
// { depth: 1,
// isString: false,
// value: 'allUsers',
// isFirstAtDepth: true },
// { depth: 2,
// isString: true,
// value: '123124',
// isFirstAtDepth: true },
// { depth: 1, isString: false, value: '.', isFirstAtDepth: false },
// { depth: 1,
// isString: false,
// value: 'username',
// isFirstAtDepth: false },
// { depth: 0, isString: false, value: '.', isFirstAtDepth: false } ]
var tstring = "'hello' + ' ' + users['mbildner'].first + ' you are now logged in'";
var demoString = 'hello' + ' ' + users['mbildner'].first + ' you are now logged in';
console.log(stripQuotes(markStrings(splitStr(tstring))));
// [ { character: 'h', isString: true },
// { character: 'e', isString: true },
// { character: 'l', isString: true },
// { character: 'l', isString: true },
// { character: 'o', isString: true },
// { character: ' ', isString: false },
// { character: '+', isString: false },
// { character: ' ', isString: false },
// { character: ' ', isString: true },
// { character: ' ', isString: false },
// { character: '+', isString: false },
// { character: ' ', isString: false },
// { character: 'u', isString: false },
// { character: 's', isString: false },
// { character: 'e', isString: false },
// { character: 'r', isString: false },
// { character: 's', isString: false },
// { character: '[', isString: false },
// { character: 'm', isString: true },
// { character: 'b', isString: true },
// { character: 'i', isString: true },
// { character: 'l', isString: true },
// { character: 'd', isString: true },
// { character: 'n', isString: true },
// { character: 'e', isString: true },
// { character: 'r', isString: true },
// { character: ']', isString: false },
// { character: '.', isString: false },
// { character: 'f', isString: false },
// { character: 'i', isString: false },
// { character: 'r', isString: false },
// { character: 's', isString: false },
// { character: 't', isString: false },
// { character: ' ', isString: false },
// { character: '+', isString: false },
// { character: ' ', isString: false },
// { character: ' ', isString: true },
// { character: 'y', isString: true },
// { character: 'o', isString: true },
// { character: 'u', isString: true },
// { character: ' ', isString: true },
// { character: 'a', isString: true },
// { character: 'r', isString: true },
// { character: 'e', isString: true },
// { character: ' ', isString: true },
// { character: 'n', isString: true },
// { character: 'o', isString: true },
// { character: 'w', isString: true },
// { character: ' ', isString: true },
// { character: 'l', isString: true },
// { character: 'o', isString: true },
// { character: 'g', isString: true },
// { character: 'g', isString: true },
// { character: 'e', isString: true },
// { character: 'd', isString: true },
// { character: ' ', isString: true },
// { character: 'i', isString: true },
// { character: 'n', isString: true } ]
// write a custom split function that reads through stringMarked charArray and splits on non-string whitespace
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment