thomaslang · March 16, 2009 22:05
diff --git a/gistfile1.js b/gistfile1.js
 var queryGrammar = {
  
  'UNKNOWN'         : {
    firstCharacter  : /\S/,
    notAllowed      : /[\s'"\w\d\(\)]/,
    reserved        : {
      'WILD_CARD'   : ['@%'],
      'COMPARATOR'  : ['=','!=','<','<=','>','>=']
                    }},
  
  'WORD'            : {
    firstCharacter  : /[a-zA-Z_]/,
    notAllowed      : /[^a-zA-Z_0-9]/,
    reserved        : {
      'BOOLEAN'     : ['true','false','YES','NO'],
      'BOOL_OP'     : ['NOT','AND','OR'],
      'COMPARATOR'  : ['BEGINS_WITH','ENDS_WITH','CONTAINS']
                    }},
  
  'NUMBER'          : {
    firstCharacter  : /\d/,
    notAllowed      : /[^\d\.]/,
    format          : /^\d+$|^\d+\.\d+$/
                    },
    
  'STRING'          : {
    firstCharacter  : /['"]/,
    delimeted       : true
                    },
  
  'OPEN_PAREN'      : {
    firstCharacter  : /\(/,
    singleCharacter : true
                    },
  
  'CLOSE_PAREN'     : {
    firstCharacter  : /\)/,
    singleCharacter : true
                    }
                    
 };


 var queryLogic = {
  
 };


 var tokenizeQuery = function (query) {return tokenizeString(query, queryGrammar)};


 var tokenizeString = function (inputString, grammar) {
 	
 	// takes a string and returns an array of tokens
 	// depending on the grammar specified
 	
 	// currently there is no form of syntax validation !
 	
 	
  var tokenList           = [];
  var c                   = null;
 	var t                   = null;
  var tokenType           = null;
  var currentTokenType    = null;
  var currentTokenValue   = null;
  var currentDelimeter    = null;
  var endOfString         = false;
  var belongsToToken      = false;
  var skipThisCharacter   = false;
  
  
  // helper function that adds tokens to the tokenList
  
  var addToken = function (tokenType, tokenValue) {
    // push token to list
    tokenList.push( {tokenType: tokenType, tokenValue: tokenValue} );
    // and clean up currentToken
    currentTokenType  = null;
    currentTokenValue = null;
  };
  
  
  // Stepping through the string:
    
  for (var i=0; i < inputString.length; i++) {
    
    // current character
    c = inputString[i];
    
    // set true after end of delimeted token so that final delimeter is not catched again
    skipThisCharacter = false;
    
    //if ( i == inputString.length-1 ) endOfString = true;
    
    
    // if currently inside a token
    
    if ( currentTokenType ) {
      
      // some helpers
      t           = grammar[currentTokenType];
      endOfToken  = (t.delimeted) ? (c==currentDelimeter) : (t.notAllowed.test(c));
      endOfString = (i==inputString.length-1);
      
      // if stil in token
      if ( !endOfToken ) currentTokenValue += c;
      
      // if end of token reached
      if ( endOfToken || endOfString ) {
        
        // handling of special cases
        if ( t.format && !t.format.test(currentTokenValue) ) 
          currentTokenType = "UNKNOWN";
        if ( t.delimeted ) 
          skipThisCharacter = true;
        if ( t.reserved ) {
          for ( tokenType in t.reserved ) {
            if ( t.reserved[tokenType].indexOf(currentTokenValue) >= 0 ) 
              currentTokenType = tokenType;
          }
        };
        
        // add token to tokenList
        addToken(currentTokenType, currentTokenValue);
      }
      
    };
    
 
    
    // if not inside a token, look for next one
    
    if ( !currentTokenType && !skipThisCharacter ) {
      for ( tokenType in queryGrammar ) {
        t = grammar[tokenType];
        
        if ( t.firstCharacter.test(c) ) {
          
          // initialize new token
          currentTokenType = tokenType;
          currentTokenValue = c;
          
          // handling of special cases
          if ( t.delimeted ) {
            currentTokenValue = "";
            currentDelimeter = c;
          };
          if ( t.singleCharacter )
            addToken(currentTokenType, currentTokenValue);
          
        }
      }
      
    };
    
  };
  
  return tokenList;
 };
	var queryGrammar = {

	'UNKNOWN' : {
	firstCharacter : /\S/,
	notAllowed : /[\s'"\w\d\(\)]/,
	reserved : {
	'WILD_CARD' : ['@%'],
	'COMPARATOR' : ['=','!=','<','<=','>','>=']
	}},

	'WORD' : {
	firstCharacter : /[a-zA-Z_]/,
	notAllowed : /[^a-zA-Z_0-9]/,
	reserved : {
	'BOOLEAN' : ['true','false','YES','NO'],
	'BOOL_OP' : ['NOT','AND','OR'],
	'COMPARATOR' : ['BEGINS_WITH','ENDS_WITH','CONTAINS']
	}},

	'NUMBER' : {
	firstCharacter : /\d/,
	notAllowed : /[^\d\.]/,
	format : /^\d+$\|^\d+\.\d+$/
	},

	'STRING' : {
	firstCharacter : /['"]/,
	delimeted : true
	},

	'OPEN_PAREN' : {
	firstCharacter : /\(/,
	singleCharacter : true
	},

	'CLOSE_PAREN' : {
	firstCharacter : /\)/,
	singleCharacter : true
	}

	};


	var queryLogic = {

	};


	var tokenizeQuery = function (query) {return tokenizeString(query, queryGrammar)};


	var tokenizeString = function (inputString, grammar) {

	// takes a string and returns an array of tokens
	// depending on the grammar specified

	// currently there is no form of syntax validation !


	var tokenList = [];
	var c = null;
	var t = null;
	var tokenType = null;
	var currentTokenType = null;
	var currentTokenValue = null;
	var currentDelimeter = null;
	var endOfString = false;
	var belongsToToken = false;
	var skipThisCharacter = false;


	// helper function that adds tokens to the tokenList

	var addToken = function (tokenType, tokenValue) {
	// push token to list
	tokenList.push( {tokenType: tokenType, tokenValue: tokenValue} );
	// and clean up currentToken
	currentTokenType = null;
	currentTokenValue = null;
	};


	// Stepping through the string:

	for (var i=0; i < inputString.length; i++) {

	// current character
	c = inputString[i];

	// set true after end of delimeted token so that final delimeter is not catched again
	skipThisCharacter = false;

	//if ( i == inputString.length-1 ) endOfString = true;


	// if currently inside a token

	if ( currentTokenType ) {

	// some helpers
	t = grammar[currentTokenType];
	endOfToken = (t.delimeted) ? (c==currentDelimeter) : (t.notAllowed.test(c));
	endOfString = (i==inputString.length-1);

	// if stil in token
	if ( !endOfToken ) currentTokenValue += c;

	// if end of token reached
	if ( endOfToken \|\| endOfString ) {

	// handling of special cases
	if ( t.format && !t.format.test(currentTokenValue) )
	currentTokenType = "UNKNOWN";
	if ( t.delimeted )
	skipThisCharacter = true;
	if ( t.reserved ) {
	for ( tokenType in t.reserved ) {
	if ( t.reserved[tokenType].indexOf(currentTokenValue) >= 0 )
	currentTokenType = tokenType;
	}
	};

	// add token to tokenList
	addToken(currentTokenType, currentTokenValue);
	}

	};



	// if not inside a token, look for next one

	if ( !currentTokenType && !skipThisCharacter ) {
	for ( tokenType in queryGrammar ) {
	t = grammar[tokenType];

	if ( t.firstCharacter.test(c) ) {

	// initialize new token
	currentTokenType = tokenType;
	currentTokenValue = c;

	// handling of special cases
	if ( t.delimeted ) {
	currentTokenValue = "";
	currentDelimeter = c;
	};
	if ( t.singleCharacter )
	addToken(currentTokenType, currentTokenValue);

	}
	}

	};

	};

	return tokenList;
	};