Last active
August 29, 2015 14:04
-
-
Save mbildner/c724fdc04edd635b6db7 to your computer and use it in GitHub Desktop.
clean tokenizer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var str = "users[allUsers['123124'].username].first"; | |
var allUsers = { | |
'123124': {username: 'mbildner'} | |
}; | |
var users = { | |
'mbildner': { | |
first: 'moshe', | |
age: 25 | |
} | |
}; | |
var name = users[allUsers['123124'].username].first; | |
var str = "users[allUsers['123124'].username].first"; | |
function markIndex (charArr) { | |
return charArr.map(function (item, indx) { | |
item.indx = indx; | |
return item; | |
}); | |
} | |
function markDepth (charArr) { | |
var currentChar; | |
var depth = 0; | |
var depthMarkedArr = charArr.map(function (item, indx) { | |
if (item.character==='[') { | |
depth++; | |
} else if (item.character===']') { | |
depth--; | |
} | |
item.depth = depth; | |
return item; | |
}); | |
return depthMarkedArr; | |
} | |
function markStrings (charArr) { | |
var insideSingleQuote = false; | |
var insideDoubleQuote = false; | |
var wasString = false; | |
function isString () { | |
var __isString = insideDoubleQuote || insideSingleQuote; | |
return __isString; | |
} | |
var stringMarkedArr = charArr.map(function (item, indx) { | |
wasString = isString(); | |
if (item.character === '"' && !insideSingleQuote) { | |
insideDoubleQuote = !insideDoubleQuote; | |
} | |
if (item.character === '\'' && !insideDoubleQuote) { | |
insideSingleQuote = !insideSingleQuote; | |
} | |
// prevent leading quotemark from getting marked as a string | |
// this logic should be moved to isString function | |
item.isString = wasString | |
? item.isString = isString() | |
: item.isString = false; | |
return item; | |
}); | |
return stringMarkedArr; | |
} | |
function splitStr (str) { | |
return str.split('') | |
.map(function (item) { | |
return { | |
character: item | |
} | |
}); | |
} | |
function stripBrackets (charArr) { | |
var filteredArr = charArr.filter(function (item) { | |
return item.isString || !(item.character==='[' || item.character===']'); | |
}); | |
return filteredArr; | |
} | |
function stripQuotes (charArr) { | |
var filteredArr = charArr.filter(function (item) { | |
return item.isString || (!(item.character==='\'' || item.character==='"')); | |
}); | |
return filteredArr; | |
} | |
function combineProcessedCharArr (charArr) { | |
var currentChar; | |
var nextChar; | |
var collector = []; | |
var tokens = []; | |
function finishToken () { | |
var token = {}; | |
token.depth = collector[0].depth; | |
token.isString = collector.some(function (item) { | |
return item.isString; | |
}); | |
token.value = collector.map(function (item) { | |
return item.character; | |
}).join(''); | |
// clear the collector | |
collector.splice(0, collector.length); | |
tokens.push(token); | |
} | |
for (var i=0; i<charArr.length; i++) { | |
currentChar = charArr[i]; | |
nextChar = charArr[i+1]; | |
if (currentChar && nextChar) { | |
collector.push(currentChar); | |
if (currentChar.depth !== nextChar.depth) { | |
finishToken(); | |
} else if (currentChar.character === '.' && !currentChar.isString) { | |
finishToken(); | |
} | |
} | |
} | |
return tokens; | |
} | |
function tokenize (str) { | |
return combineProcessedCharArr | |
(stripQuotes | |
(stripBrackets | |
(markStrings | |
(markDepth | |
(markIndex | |
(splitStr(str))))))); | |
} | |
var tokens = tokenize(str); | |
// console.log(tokens); | |
// [ { depth: 0, isString: false, value: 'users' }, | |
// { depth: 1, isString: false, value: 'allUsers' }, | |
// { depth: 2, isString: true, value: '123124' }, | |
// { depth: 1, isString: false, value: '.' }, | |
// { depth: 1, isString: false, value: 'username' }, | |
// { depth: 0, isString: false, value: '.' } ] | |
// | |
function markContext (tokenArr) { | |
var recordedDepths = {}; | |
return tokens.map(function (token) { | |
if (!recordedDepths.hasOwnProperty(token.depth)) { | |
recordedDepths[token.depth] = true; | |
token.isFirstAtDepth = true; | |
} else { | |
token.isFirstAtDepth = false; | |
} | |
return token; | |
}); | |
} | |
// var contextualizedTokens = markContext(tokens); | |
// [ { depth: 0, | |
// isString: false, | |
// value: 'users', | |
// isFirstAtDepth: true }, | |
// { depth: 1, | |
// isString: false, | |
// value: 'allUsers', | |
// isFirstAtDepth: true }, | |
// { depth: 2, | |
// isString: true, | |
// value: '123124', | |
// isFirstAtDepth: true }, | |
// { depth: 1, isString: false, value: '.', isFirstAtDepth: false }, | |
// { depth: 1, | |
// isString: false, | |
// value: 'username', | |
// isFirstAtDepth: false }, | |
// { depth: 0, isString: false, value: '.', isFirstAtDepth: false } ] | |
var tstring = "'hello' + ' ' + users['mbildner'].first + ' you are now logged in'"; | |
var demoString = 'hello' + ' ' + users['mbildner'].first + ' you are now logged in'; | |
console.log(stripQuotes(markStrings(splitStr(tstring)))); | |
// [ { character: 'h', isString: true }, | |
// { character: 'e', isString: true }, | |
// { character: 'l', isString: true }, | |
// { character: 'l', isString: true }, | |
// { character: 'o', isString: true }, | |
// { character: ' ', isString: false }, | |
// { character: '+', isString: false }, | |
// { character: ' ', isString: false }, | |
// { character: ' ', isString: true }, | |
// { character: ' ', isString: false }, | |
// { character: '+', isString: false }, | |
// { character: ' ', isString: false }, | |
// { character: 'u', isString: false }, | |
// { character: 's', isString: false }, | |
// { character: 'e', isString: false }, | |
// { character: 'r', isString: false }, | |
// { character: 's', isString: false }, | |
// { character: '[', isString: false }, | |
// { character: 'm', isString: true }, | |
// { character: 'b', isString: true }, | |
// { character: 'i', isString: true }, | |
// { character: 'l', isString: true }, | |
// { character: 'd', isString: true }, | |
// { character: 'n', isString: true }, | |
// { character: 'e', isString: true }, | |
// { character: 'r', isString: true }, | |
// { character: ']', isString: false }, | |
// { character: '.', isString: false }, | |
// { character: 'f', isString: false }, | |
// { character: 'i', isString: false }, | |
// { character: 'r', isString: false }, | |
// { character: 's', isString: false }, | |
// { character: 't', isString: false }, | |
// { character: ' ', isString: false }, | |
// { character: '+', isString: false }, | |
// { character: ' ', isString: false }, | |
// { character: ' ', isString: true }, | |
// { character: 'y', isString: true }, | |
// { character: 'o', isString: true }, | |
// { character: 'u', isString: true }, | |
// { character: ' ', isString: true }, | |
// { character: 'a', isString: true }, | |
// { character: 'r', isString: true }, | |
// { character: 'e', isString: true }, | |
// { character: ' ', isString: true }, | |
// { character: 'n', isString: true }, | |
// { character: 'o', isString: true }, | |
// { character: 'w', isString: true }, | |
// { character: ' ', isString: true }, | |
// { character: 'l', isString: true }, | |
// { character: 'o', isString: true }, | |
// { character: 'g', isString: true }, | |
// { character: 'g', isString: true }, | |
// { character: 'e', isString: true }, | |
// { character: 'd', isString: true }, | |
// { character: ' ', isString: true }, | |
// { character: 'i', isString: true }, | |
// { character: 'n', isString: true } ] | |
// write a custom split function that reads through stringMarked charArray and splits on non-string whitespace |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment