Created
March 16, 2009 22:05
-
-
Save thomaslang/80119 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| var queryGrammar = { | |
| 'UNKNOWN' : { | |
| firstCharacter : /\S/, | |
| notAllowed : /[\s'"\w\d\(\)]/, | |
| reserved : { | |
| 'WILD_CARD' : ['@%'], | |
| 'COMPARATOR' : ['=','!=','<','<=','>','>='] | |
| }}, | |
| 'WORD' : { | |
| firstCharacter : /[a-zA-Z_]/, | |
| notAllowed : /[^a-zA-Z_0-9]/, | |
| reserved : { | |
| 'BOOLEAN' : ['true','false','YES','NO'], | |
| 'BOOL_OP' : ['NOT','AND','OR'], | |
| 'COMPARATOR' : ['BEGINS_WITH','ENDS_WITH','CONTAINS'] | |
| }}, | |
| 'NUMBER' : { | |
| firstCharacter : /\d/, | |
| notAllowed : /[^\d\.]/, | |
| format : /^\d+$|^\d+\.\d+$/ | |
| }, | |
| 'STRING' : { | |
| firstCharacter : /['"]/, | |
| delimeted : true | |
| }, | |
| 'OPEN_PAREN' : { | |
| firstCharacter : /\(/, | |
| singleCharacter : true | |
| }, | |
| 'CLOSE_PAREN' : { | |
| firstCharacter : /\)/, | |
| singleCharacter : true | |
| } | |
| }; | |
| var queryLogic = { | |
| }; | |
| var tokenizeQuery = function (query) {return tokenizeString(query, queryGrammar)}; | |
| var tokenizeString = function (inputString, grammar) { | |
| // takes a string and returns an array of tokens | |
| // depending on the grammar specified | |
| // currently there is no form of syntax validation ! | |
| var tokenList = []; | |
| var c = null; | |
| var t = null; | |
| var tokenType = null; | |
| var currentTokenType = null; | |
| var currentTokenValue = null; | |
| var currentDelimeter = null; | |
| var endOfString = false; | |
| var belongsToToken = false; | |
| var skipThisCharacter = false; | |
| // helper function that adds tokens to the tokenList | |
| var addToken = function (tokenType, tokenValue) { | |
| // push token to list | |
| tokenList.push( {tokenType: tokenType, tokenValue: tokenValue} ); | |
| // and clean up currentToken | |
| currentTokenType = null; | |
| currentTokenValue = null; | |
| }; | |
| // Stepping through the string: | |
| for (var i=0; i < inputString.length; i++) { | |
| // current character | |
| c = inputString[i]; | |
| // set true after end of delimeted token so that final delimeter is not catched again | |
| skipThisCharacter = false; | |
| //if ( i == inputString.length-1 ) endOfString = true; | |
| // if currently inside a token | |
| if ( currentTokenType ) { | |
| // some helpers | |
| t = grammar[currentTokenType]; | |
| endOfToken = (t.delimeted) ? (c==currentDelimeter) : (t.notAllowed.test(c)); | |
| endOfString = (i==inputString.length-1); | |
| // if stil in token | |
| if ( !endOfToken ) currentTokenValue += c; | |
| // if end of token reached | |
| if ( endOfToken || endOfString ) { | |
| // handling of special cases | |
| if ( t.format && !t.format.test(currentTokenValue) ) | |
| currentTokenType = "UNKNOWN"; | |
| if ( t.delimeted ) | |
| skipThisCharacter = true; | |
| if ( t.reserved ) { | |
| for ( tokenType in t.reserved ) { | |
| if ( t.reserved[tokenType].indexOf(currentTokenValue) >= 0 ) | |
| currentTokenType = tokenType; | |
| } | |
| }; | |
| // add token to tokenList | |
| addToken(currentTokenType, currentTokenValue); | |
| } | |
| }; | |
| // if not inside a token, look for next one | |
| if ( !currentTokenType && !skipThisCharacter ) { | |
| for ( tokenType in queryGrammar ) { | |
| t = grammar[tokenType]; | |
| if ( t.firstCharacter.test(c) ) { | |
| // initialize new token | |
| currentTokenType = tokenType; | |
| currentTokenValue = c; | |
| // handling of special cases | |
| if ( t.delimeted ) { | |
| currentTokenValue = ""; | |
| currentDelimeter = c; | |
| }; | |
| if ( t.singleCharacter ) | |
| addToken(currentTokenType, currentTokenValue); | |
| } | |
| } | |
| }; | |
| }; | |
| return tokenList; | |
| }; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment