thomaslang · March 18, 2009 16:35
diff --git a/gistfile1.js b/gistfile1.js
 /*

 Copy this code into the console and execute t=tree(q) for a start
 Check the tree t with t[0].leftSide, t[0].rightSide.leftSide and so on...

 Known problems:
 -Tokenizer doesn't recognise tokens of lengh 1 at the end of the query string
 -Boolean NOT doesn't work
 -All logic patterns are still only in general binary form

 */

 var q = "name = 'John' AND ( year < 2008 OR check = 'nsjs')";

 function tree (queryString) {
  return buildTokenTree(tokenizeQuery(queryString));
 };



 var queryGrammar = {
  
  'UNKNOWN'         : {
    firstCharacter  : /\S/,
    notAllowed      : /[\s'"\w\d\(\)]/,
    reserved        : {
      'WILD_CARD'   : ['@%'],
      'COMPARATOR'  : ['=','!=','<','<=','>','>=']
                    }},
  
  'WORD'            : {
    firstCharacter  : /[a-zA-Z_]/,
    notAllowed      : /[^a-zA-Z_0-9]/,
    reserved        : {
      'BOOLEAN'     : ['true','false','YES','NO'],
      'BOOL_OP'     : ['NOT','AND','OR'],
      'COMPARATOR'  : ['BEGINS_WITH','ENDS_WITH','CONTAINS']
                    }},
  
  'NUMBER'          : {
    firstCharacter  : /\d/,
    notAllowed      : /[^\d\.]/,
    format          : /^\d+$|^\d+\.\d+$/
                    },
    
  'STRING'          : {
    firstCharacter  : /['"]/,
    delimeted       : true
                    },
  
  'OPEN_PAREN'      : {
    firstCharacter  : /\(/,
    singleCharacter : true
                    },
  
  'CLOSE_PAREN'     : {
    firstCharacter  : /\)/,
    singleCharacter : true
                    }
                    
 };




 var tokenizeQuery = function (query) {return tokenizeString(query, queryGrammar)};


 var tokenizeString = function (inputString, grammar) {
 	
 	// takes a string and returns an array of tokens
 	// depending on the grammar specified
 	
 	// currently there is no form of syntax validation !
 	
 	
  var tokenList           = [];
  var c                   = null;
 	var t                   = null;
  var tokenType           = null;
  var currentTokenType    = null;
  var currentTokenValue   = null;
  var currentDelimeter    = null;
  var endOfString         = false;
  var belongsToToken      = false;
  var skipThisCharacter   = false;
  
  
  // helper function that adds tokens to the tokenList
  
  var addToken = function (tokenType, tokenValue) {
    // push token to list
    tokenList.push( {tokenType: tokenType, tokenValue: tokenValue} );
    // and clean up currentToken
    currentTokenType  = null;
    currentTokenValue = null;
  };
  
  
  // Stepping through the string:
    
  for (var i=0; i < inputString.length; i++) {
    
    // current character
    c = inputString[i];
    
    // set true after end of delimeted token so that final delimeter is not catched again
    skipThisCharacter = false;
    
    //if ( i == inputString.length-1 ) endOfString = true;
    
    
    // if currently inside a token
    
    if ( currentTokenType ) {
      
      // some helpers
      t           = grammar[currentTokenType];
      endOfToken  = (t.delimeted) ? (c==currentDelimeter) : (t.notAllowed.test(c));
      endOfString = (i==inputString.length-1);
      
      // if stil in token
      if ( !endOfToken ) currentTokenValue += c;
      
      // if end of token reached
      if ( endOfToken || endOfString ) {
        
        // handling of special cases
        if ( t.format && !t.format.test(currentTokenValue) ) 
          currentTokenType = "UNKNOWN";
        if ( t.delimeted ) 
          skipThisCharacter = true;
        if ( t.reserved ) {
          for ( tokenType in t.reserved ) {
            if ( t.reserved[tokenType].indexOf(currentTokenValue) >= 0 ) 
              currentTokenType = tokenType;
          }
        };
        
        // add token to tokenList
        addToken(currentTokenType, currentTokenValue);
      }
      
    };
    
 
    
    // if not inside a token, look for next one
    
    if ( !currentTokenType && !skipThisCharacter ) {
      for ( tokenType in grammar ) {
        t = grammar[tokenType];
        
        if ( t.firstCharacter.test(c) ) {
          
          // initialize new token
          currentTokenType = tokenType;
          currentTokenValue = c;
          
          // handling of special cases
          if ( t.delimeted ) {
            currentTokenValue = "";
            currentDelimeter = c;
          };
          if ( t.singleCharacter )
            addToken(currentTokenType, currentTokenValue);
          
        }
      }
      
    };
    
  };
  
  return tokenList;
 };



 var queryLogic = {
  'STRING'      : {
    evalType    : 'PRIMITIVE'
                },
  
  'WORD'        : {
    evalType    : 'PRIMITIVE'
                },
  
  'NUMBER'      : {
    evalType    : 'PRIMITIVE'
                },
                
  'COMPARATOR'  : {
    leftType    : 'PRIMITIVE',
    rightType   : 'PRIMITIVE',
    evalType    : 'BOOLEAN'
                },
                
  'BOOL_OP'     : {
    leftType    : 'BOOLEAN',
    rightType   : 'BOOLEAN',
    evalType    : 'BOOLEAN',
    exceptions  : {
      'NOT'     : {
        leftType : false
                }}}
 };


 var buildTokenTree = function (tokenList) {
  
  var l = tokenList;
  var i = 0;
  var openParenthesisStack = [];
  //var booleanOperatorStack = [];
  var shouldCheckAgain     = false;

  
  
  // some helper functions
  
  function tokenLogic (position) {
    var p = position;
    if ( p < 0 ) return false;
    return queryLogic[l[p].tokenType];
  };
  
  function expectedType (side, position) {
    var p = position;
    var tl = tokenLogic(p);
    if ( !tl )            return false;
    if (side == 'left')   return tl.leftType;
    if (side == 'right')  return tl.rightType;
  };
  
  function evalType (position) {
    var p = position;
    var tl = tokenLogic(p);
    if ( !tl )  return false;
    else        return tl.evalType;
  };
  
  function removeToken (position) {
    l.splice(position, 1);
    if ( position <= i ) i--;
  };
  
  function preceedingTokenExists (position) {
    var p = position || i;
    if ( p > 0 )  return true;
    else          return false;
  };
  
  function tokenIsMissingChilds (position) {
    var p = position;
    if ( p < 0 ) return true;
    if (( expectedType('left',p) && !l[p].leftSide )
            || ( expectedType('right',p) && !l[p].rightSide ))
                  return true;
    else          return false;
  };
  
  function typesAreMatching (parent, child) {
    var side = (child < parent) ? 'left' : 'right';
    if ( parent < 0 || child < 0 )    return false;
    if ( !expectedType(side,parent) ) return false;
    if ( !evalType(child) )           return false;
    else                              return (expectedType(side,parent) == evalType(child));
  };
  
  function preceedingTokenCanBeMadeChild (position) {
    var p = position;
    if ( !tokenIsMissingChilds(p) )   return false;
    if ( !preceedingTokenExists(p) )  return false;
    if ( typesAreMatching(p,p-1) )    return true;
    else                              return false;
  };
  
  function preceedingTokenCanBeMadeParent (position) {
    var p = position;
    if ( tokenIsMissingChilds(p) )    return false;
    if ( !preceedingTokenExists(p) )  return false;
    if ( !tokenIsMissingChilds(p-1) ) return false;
    if ( typesAreMatching(p-1,p) )    return true;
    else                              return false;
  };
  
  function makeChild (position) {
    var p = position;
    if (p<1) return false;
    l[p].leftSide = l[p-1];
    removeToken(p-1);
  };
  
  function makeParent (position) {
    var p = position;
    if (p<1) return false;
    l[p-1].rightSide = l[p];
    removeToken(p);
  };
  
  function removeParenthesesPair () {
    removeToken(i);
    removeToken(openParenthesisStack.pop());
  };
  
  // step through the tokenList
  
  for (i=0; i < l.length; i++) {
    shouldCheckAgain = false;
    
    if ( l[i].tokenType == 'OPEN_PAREN' )       openParenthesisStack.push(i);
    if ( l[i].tokenType == 'CLOSE_PAREN' )      removeParenthesesPair();
    if ( preceedingTokenCanBeMadeChild(i) )     makeChild(i);
    if ( preceedingTokenCanBeMadeParent(i) )  { makeParent(i);
                                                shouldCheckAgain = true; }; 
    if ( shouldCheckAgain )                     i--;
    
    
  };
  
  
  return tokenList;
  
 };
	/*

	Copy this code into the console and execute t=tree(q) for a start
	Check the tree t with t[0].leftSide, t[0].rightSide.leftSide and so on...

	Known problems:
	-Tokenizer doesn't recognise tokens of lengh 1 at the end of the query string
	-Boolean NOT doesn't work
	-All logic patterns are still only in general binary form

	*/

	var q = "name = 'John' AND ( year < 2008 OR check = 'nsjs')";

	function tree (queryString) {
	return buildTokenTree(tokenizeQuery(queryString));
	};



	var queryGrammar = {

	'UNKNOWN' : {
	firstCharacter : /\S/,
	notAllowed : /[\s'"\w\d\(\)]/,
	reserved : {
	'WILD_CARD' : ['@%'],
	'COMPARATOR' : ['=','!=','<','<=','>','>=']
	}},

	'WORD' : {
	firstCharacter : /[a-zA-Z_]/,
	notAllowed : /[^a-zA-Z_0-9]/,
	reserved : {
	'BOOLEAN' : ['true','false','YES','NO'],
	'BOOL_OP' : ['NOT','AND','OR'],
	'COMPARATOR' : ['BEGINS_WITH','ENDS_WITH','CONTAINS']
	}},

	'NUMBER' : {
	firstCharacter : /\d/,
	notAllowed : /[^\d\.]/,
	format : /^\d+$\|^\d+\.\d+$/
	},

	'STRING' : {
	firstCharacter : /['"]/,
	delimeted : true
	},

	'OPEN_PAREN' : {
	firstCharacter : /\(/,
	singleCharacter : true
	},

	'CLOSE_PAREN' : {
	firstCharacter : /\)/,
	singleCharacter : true
	}

	};




	var tokenizeQuery = function (query) {return tokenizeString(query, queryGrammar)};


	var tokenizeString = function (inputString, grammar) {

	// takes a string and returns an array of tokens
	// depending on the grammar specified

	// currently there is no form of syntax validation !


	var tokenList = [];
	var c = null;
	var t = null;
	var tokenType = null;
	var currentTokenType = null;
	var currentTokenValue = null;
	var currentDelimeter = null;
	var endOfString = false;
	var belongsToToken = false;
	var skipThisCharacter = false;


	// helper function that adds tokens to the tokenList

	var addToken = function (tokenType, tokenValue) {
	// push token to list
	tokenList.push( {tokenType: tokenType, tokenValue: tokenValue} );
	// and clean up currentToken
	currentTokenType = null;
	currentTokenValue = null;
	};


	// Stepping through the string:

	for (var i=0; i < inputString.length; i++) {

	// current character
	c = inputString[i];

	// set true after end of delimeted token so that final delimeter is not catched again
	skipThisCharacter = false;

	//if ( i == inputString.length-1 ) endOfString = true;


	// if currently inside a token

	if ( currentTokenType ) {

	// some helpers
	t = grammar[currentTokenType];
	endOfToken = (t.delimeted) ? (c==currentDelimeter) : (t.notAllowed.test(c));
	endOfString = (i==inputString.length-1);

	// if stil in token
	if ( !endOfToken ) currentTokenValue += c;

	// if end of token reached
	if ( endOfToken \|\| endOfString ) {

	// handling of special cases
	if ( t.format && !t.format.test(currentTokenValue) )
	currentTokenType = "UNKNOWN";
	if ( t.delimeted )
	skipThisCharacter = true;
	if ( t.reserved ) {
	for ( tokenType in t.reserved ) {
	if ( t.reserved[tokenType].indexOf(currentTokenValue) >= 0 )
	currentTokenType = tokenType;
	}
	};

	// add token to tokenList
	addToken(currentTokenType, currentTokenValue);
	}

	};



	// if not inside a token, look for next one

	if ( !currentTokenType && !skipThisCharacter ) {
	for ( tokenType in grammar ) {
	t = grammar[tokenType];

	if ( t.firstCharacter.test(c) ) {

	// initialize new token
	currentTokenType = tokenType;
	currentTokenValue = c;

	// handling of special cases
	if ( t.delimeted ) {
	currentTokenValue = "";
	currentDelimeter = c;
	};
	if ( t.singleCharacter )
	addToken(currentTokenType, currentTokenValue);

	}
	}

	};

	};

	return tokenList;
	};



	var queryLogic = {
	'STRING' : {
	evalType : 'PRIMITIVE'
	},

	'WORD' : {
	evalType : 'PRIMITIVE'
	},

	'NUMBER' : {
	evalType : 'PRIMITIVE'
	},

	'COMPARATOR' : {
	leftType : 'PRIMITIVE',
	rightType : 'PRIMITIVE',
	evalType : 'BOOLEAN'
	},

	'BOOL_OP' : {
	leftType : 'BOOLEAN',
	rightType : 'BOOLEAN',
	evalType : 'BOOLEAN',
	exceptions : {
	'NOT' : {
	leftType : false
	}}}
	};


	var buildTokenTree = function (tokenList) {

	var l = tokenList;
	var i = 0;
	var openParenthesisStack = [];
	//var booleanOperatorStack = [];
	var shouldCheckAgain = false;



	// some helper functions

	function tokenLogic (position) {
	var p = position;
	if ( p < 0 ) return false;
	return queryLogic[l[p].tokenType];
	};

	function expectedType (side, position) {
	var p = position;
	var tl = tokenLogic(p);
	if ( !tl ) return false;
	if (side == 'left') return tl.leftType;
	if (side == 'right') return tl.rightType;
	};

	function evalType (position) {
	var p = position;
	var tl = tokenLogic(p);
	if ( !tl ) return false;
	else return tl.evalType;
	};

	function removeToken (position) {
	l.splice(position, 1);
	if ( position <= i ) i--;
	};

	function preceedingTokenExists (position) {
	var p = position \|\| i;
	if ( p > 0 ) return true;
	else return false;
	};

	function tokenIsMissingChilds (position) {
	var p = position;
	if ( p < 0 ) return true;
	if (( expectedType('left',p) && !l[p].leftSide )
	\|\| ( expectedType('right',p) && !l[p].rightSide ))
	return true;
	else return false;
	};

	function typesAreMatching (parent, child) {
	var side = (child < parent) ? 'left' : 'right';
	if ( parent < 0 \|\| child < 0 ) return false;
	if ( !expectedType(side,parent) ) return false;
	if ( !evalType(child) ) return false;
	else return (expectedType(side,parent) == evalType(child));
	};

	function preceedingTokenCanBeMadeChild (position) {
	var p = position;
	if ( !tokenIsMissingChilds(p) ) return false;
	if ( !preceedingTokenExists(p) ) return false;
	if ( typesAreMatching(p,p-1) ) return true;
	else return false;
	};

	function preceedingTokenCanBeMadeParent (position) {
	var p = position;
	if ( tokenIsMissingChilds(p) ) return false;
	if ( !preceedingTokenExists(p) ) return false;
	if ( !tokenIsMissingChilds(p-1) ) return false;
	if ( typesAreMatching(p-1,p) ) return true;
	else return false;
	};

	function makeChild (position) {
	var p = position;
	if (p<1) return false;
	l[p].leftSide = l[p-1];
	removeToken(p-1);
	};

	function makeParent (position) {
	var p = position;
	if (p<1) return false;
	l[p-1].rightSide = l[p];
	removeToken(p);
	};

	function removeParenthesesPair () {
	removeToken(i);
	removeToken(openParenthesisStack.pop());
	};

	// step through the tokenList

	for (i=0; i < l.length; i++) {
	shouldCheckAgain = false;

	if ( l[i].tokenType == 'OPEN_PAREN' ) openParenthesisStack.push(i);
	if ( l[i].tokenType == 'CLOSE_PAREN' ) removeParenthesesPair();
	if ( preceedingTokenCanBeMadeChild(i) ) makeChild(i);
	if ( preceedingTokenCanBeMadeParent(i) ) { makeParent(i);
	shouldCheckAgain = true; };
	if ( shouldCheckAgain ) i--;


	};


	return tokenList;

	};