Skip to content

Instantly share code, notes, and snippets.

@thomaslang
Created March 16, 2009 22:05
Show Gist options
  • Save thomaslang/80119 to your computer and use it in GitHub Desktop.
Save thomaslang/80119 to your computer and use it in GitHub Desktop.
var queryGrammar = {
'UNKNOWN' : {
firstCharacter : /\S/,
notAllowed : /[\s'"\w\d\(\)]/,
reserved : {
'WILD_CARD' : ['@%'],
'COMPARATOR' : ['=','!=','<','<=','>','>=']
}},
'WORD' : {
firstCharacter : /[a-zA-Z_]/,
notAllowed : /[^a-zA-Z_0-9]/,
reserved : {
'BOOLEAN' : ['true','false','YES','NO'],
'BOOL_OP' : ['NOT','AND','OR'],
'COMPARATOR' : ['BEGINS_WITH','ENDS_WITH','CONTAINS']
}},
'NUMBER' : {
firstCharacter : /\d/,
notAllowed : /[^\d\.]/,
format : /^\d+$|^\d+\.\d+$/
},
'STRING' : {
firstCharacter : /['"]/,
delimeted : true
},
'OPEN_PAREN' : {
firstCharacter : /\(/,
singleCharacter : true
},
'CLOSE_PAREN' : {
firstCharacter : /\)/,
singleCharacter : true
}
};
var queryLogic = {
};
var tokenizeQuery = function (query) {return tokenizeString(query, queryGrammar)};
var tokenizeString = function (inputString, grammar) {
// takes a string and returns an array of tokens
// depending on the grammar specified
// currently there is no form of syntax validation !
var tokenList = [];
var c = null;
var t = null;
var tokenType = null;
var currentTokenType = null;
var currentTokenValue = null;
var currentDelimeter = null;
var endOfString = false;
var belongsToToken = false;
var skipThisCharacter = false;
// helper function that adds tokens to the tokenList
var addToken = function (tokenType, tokenValue) {
// push token to list
tokenList.push( {tokenType: tokenType, tokenValue: tokenValue} );
// and clean up currentToken
currentTokenType = null;
currentTokenValue = null;
};
// Stepping through the string:
for (var i=0; i < inputString.length; i++) {
// current character
c = inputString[i];
// set true after end of delimeted token so that final delimeter is not catched again
skipThisCharacter = false;
//if ( i == inputString.length-1 ) endOfString = true;
// if currently inside a token
if ( currentTokenType ) {
// some helpers
t = grammar[currentTokenType];
endOfToken = (t.delimeted) ? (c==currentDelimeter) : (t.notAllowed.test(c));
endOfString = (i==inputString.length-1);
// if stil in token
if ( !endOfToken ) currentTokenValue += c;
// if end of token reached
if ( endOfToken || endOfString ) {
// handling of special cases
if ( t.format && !t.format.test(currentTokenValue) )
currentTokenType = "UNKNOWN";
if ( t.delimeted )
skipThisCharacter = true;
if ( t.reserved ) {
for ( tokenType in t.reserved ) {
if ( t.reserved[tokenType].indexOf(currentTokenValue) >= 0 )
currentTokenType = tokenType;
}
};
// add token to tokenList
addToken(currentTokenType, currentTokenValue);
}
};
// if not inside a token, look for next one
if ( !currentTokenType && !skipThisCharacter ) {
for ( tokenType in queryGrammar ) {
t = grammar[tokenType];
if ( t.firstCharacter.test(c) ) {
// initialize new token
currentTokenType = tokenType;
currentTokenValue = c;
// handling of special cases
if ( t.delimeted ) {
currentTokenValue = "";
currentDelimeter = c;
};
if ( t.singleCharacter )
addToken(currentTokenType, currentTokenValue);
}
}
};
};
return tokenList;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment