Skip to content

Instantly share code, notes, and snippets.

@thomaslang
Created March 18, 2009 16:35
Show Gist options
  • Save thomaslang/81235 to your computer and use it in GitHub Desktop.
Save thomaslang/81235 to your computer and use it in GitHub Desktop.
/*
Copy this code into the console and execute t=tree(q) for a start
Check the tree t with t[0].leftSide, t[0].rightSide.leftSide and so on...
Known problems:
-Tokenizer doesn't recognise tokens of lengh 1 at the end of the query string
-Boolean NOT doesn't work
-All logic patterns are still only in general binary form
*/
var q = "name = 'John' AND ( year < 2008 OR check = 'nsjs')";
function tree (queryString) {
return buildTokenTree(tokenizeQuery(queryString));
};
var queryGrammar = {
'UNKNOWN' : {
firstCharacter : /\S/,
notAllowed : /[\s'"\w\d\(\)]/,
reserved : {
'WILD_CARD' : ['@%'],
'COMPARATOR' : ['=','!=','<','<=','>','>=']
}},
'WORD' : {
firstCharacter : /[a-zA-Z_]/,
notAllowed : /[^a-zA-Z_0-9]/,
reserved : {
'BOOLEAN' : ['true','false','YES','NO'],
'BOOL_OP' : ['NOT','AND','OR'],
'COMPARATOR' : ['BEGINS_WITH','ENDS_WITH','CONTAINS']
}},
'NUMBER' : {
firstCharacter : /\d/,
notAllowed : /[^\d\.]/,
format : /^\d+$|^\d+\.\d+$/
},
'STRING' : {
firstCharacter : /['"]/,
delimeted : true
},
'OPEN_PAREN' : {
firstCharacter : /\(/,
singleCharacter : true
},
'CLOSE_PAREN' : {
firstCharacter : /\)/,
singleCharacter : true
}
};
var tokenizeQuery = function (query) {return tokenizeString(query, queryGrammar)};
var tokenizeString = function (inputString, grammar) {
// takes a string and returns an array of tokens
// depending on the grammar specified
// currently there is no form of syntax validation !
var tokenList = [];
var c = null;
var t = null;
var tokenType = null;
var currentTokenType = null;
var currentTokenValue = null;
var currentDelimeter = null;
var endOfString = false;
var belongsToToken = false;
var skipThisCharacter = false;
// helper function that adds tokens to the tokenList
var addToken = function (tokenType, tokenValue) {
// push token to list
tokenList.push( {tokenType: tokenType, tokenValue: tokenValue} );
// and clean up currentToken
currentTokenType = null;
currentTokenValue = null;
};
// Stepping through the string:
for (var i=0; i < inputString.length; i++) {
// current character
c = inputString[i];
// set true after end of delimeted token so that final delimeter is not catched again
skipThisCharacter = false;
//if ( i == inputString.length-1 ) endOfString = true;
// if currently inside a token
if ( currentTokenType ) {
// some helpers
t = grammar[currentTokenType];
endOfToken = (t.delimeted) ? (c==currentDelimeter) : (t.notAllowed.test(c));
endOfString = (i==inputString.length-1);
// if stil in token
if ( !endOfToken ) currentTokenValue += c;
// if end of token reached
if ( endOfToken || endOfString ) {
// handling of special cases
if ( t.format && !t.format.test(currentTokenValue) )
currentTokenType = "UNKNOWN";
if ( t.delimeted )
skipThisCharacter = true;
if ( t.reserved ) {
for ( tokenType in t.reserved ) {
if ( t.reserved[tokenType].indexOf(currentTokenValue) >= 0 )
currentTokenType = tokenType;
}
};
// add token to tokenList
addToken(currentTokenType, currentTokenValue);
}
};
// if not inside a token, look for next one
if ( !currentTokenType && !skipThisCharacter ) {
for ( tokenType in grammar ) {
t = grammar[tokenType];
if ( t.firstCharacter.test(c) ) {
// initialize new token
currentTokenType = tokenType;
currentTokenValue = c;
// handling of special cases
if ( t.delimeted ) {
currentTokenValue = "";
currentDelimeter = c;
};
if ( t.singleCharacter )
addToken(currentTokenType, currentTokenValue);
}
}
};
};
return tokenList;
};
var queryLogic = {
'STRING' : {
evalType : 'PRIMITIVE'
},
'WORD' : {
evalType : 'PRIMITIVE'
},
'NUMBER' : {
evalType : 'PRIMITIVE'
},
'COMPARATOR' : {
leftType : 'PRIMITIVE',
rightType : 'PRIMITIVE',
evalType : 'BOOLEAN'
},
'BOOL_OP' : {
leftType : 'BOOLEAN',
rightType : 'BOOLEAN',
evalType : 'BOOLEAN',
exceptions : {
'NOT' : {
leftType : false
}}}
};
var buildTokenTree = function (tokenList) {
var l = tokenList;
var i = 0;
var openParenthesisStack = [];
//var booleanOperatorStack = [];
var shouldCheckAgain = false;
// some helper functions
function tokenLogic (position) {
var p = position;
if ( p < 0 ) return false;
return queryLogic[l[p].tokenType];
};
function expectedType (side, position) {
var p = position;
var tl = tokenLogic(p);
if ( !tl ) return false;
if (side == 'left') return tl.leftType;
if (side == 'right') return tl.rightType;
};
function evalType (position) {
var p = position;
var tl = tokenLogic(p);
if ( !tl ) return false;
else return tl.evalType;
};
function removeToken (position) {
l.splice(position, 1);
if ( position <= i ) i--;
};
function preceedingTokenExists (position) {
var p = position || i;
if ( p > 0 ) return true;
else return false;
};
function tokenIsMissingChilds (position) {
var p = position;
if ( p < 0 ) return true;
if (( expectedType('left',p) && !l[p].leftSide )
|| ( expectedType('right',p) && !l[p].rightSide ))
return true;
else return false;
};
function typesAreMatching (parent, child) {
var side = (child < parent) ? 'left' : 'right';
if ( parent < 0 || child < 0 ) return false;
if ( !expectedType(side,parent) ) return false;
if ( !evalType(child) ) return false;
else return (expectedType(side,parent) == evalType(child));
};
function preceedingTokenCanBeMadeChild (position) {
var p = position;
if ( !tokenIsMissingChilds(p) ) return false;
if ( !preceedingTokenExists(p) ) return false;
if ( typesAreMatching(p,p-1) ) return true;
else return false;
};
function preceedingTokenCanBeMadeParent (position) {
var p = position;
if ( tokenIsMissingChilds(p) ) return false;
if ( !preceedingTokenExists(p) ) return false;
if ( !tokenIsMissingChilds(p-1) ) return false;
if ( typesAreMatching(p-1,p) ) return true;
else return false;
};
function makeChild (position) {
var p = position;
if (p<1) return false;
l[p].leftSide = l[p-1];
removeToken(p-1);
};
function makeParent (position) {
var p = position;
if (p<1) return false;
l[p-1].rightSide = l[p];
removeToken(p);
};
function removeParenthesesPair () {
removeToken(i);
removeToken(openParenthesisStack.pop());
};
// step through the tokenList
for (i=0; i < l.length; i++) {
shouldCheckAgain = false;
if ( l[i].tokenType == 'OPEN_PAREN' ) openParenthesisStack.push(i);
if ( l[i].tokenType == 'CLOSE_PAREN' ) removeParenthesesPair();
if ( preceedingTokenCanBeMadeChild(i) ) makeChild(i);
if ( preceedingTokenCanBeMadeParent(i) ) { makeParent(i);
shouldCheckAgain = true; };
if ( shouldCheckAgain ) i--;
};
return tokenList;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment