Created
March 11, 2009 16:28
-
-
Save thomaslang/77549 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| parseQuery = function (queryString) { | |
| var literalStrings = getLiteralStrings(queryString); | |
| var strippedString = stripLiteralStrings(queryString, literalStrings); | |
| var booleanExpression = getBooleanExpression(strippedString); | |
| var strippedComparisons = getStrippedComparisons(strippedString); | |
| var comparisons = constructComparisons(strippedComparisons, literalStrings); | |
| return { booleanExpression: booleanExpression, | |
| comparisons: strippedComparisons //temporary, will be comparisons then finished | |
| }; | |
| }; | |
| getLiteralStrings = function (queryString) { | |
| // this function steps through the input string, recognizes literal strings, | |
| // remembers their position in the input string and returns an array that | |
| // contains objects like this: | |
| // {startPoint: n, endPoint: m, sequenceOfCharacters: s} | |
| var currentlyInString = false; | |
| var delimeterOfCurrentString = ""; | |
| var numberOfCurrentString = 0; | |
| var isStringDelimeter = {"'": true, '"': true}; | |
| var isEscapeCharacter = {"\\": true}; | |
| var currentCharacter = ""; | |
| var previousCharacter = ""; | |
| var literalStrings = []; //to be returned | |
| // first find the strings start- and end-points: | |
| for (var i=0; i < queryString.length; i++) { | |
| currentCharacter = queryString[i]; | |
| if ( currentlyInString ) { | |
| if ( currentCharacter == delimeterOfCurrentString | |
| && ! isEscapeCharacter[currentCharacter] ) { | |
| currentlyInString = false; | |
| literalStrings[numberOfCurrentString].endPoint = i; | |
| numberOfCurrentString++; | |
| }; | |
| } | |
| else { | |
| if ( isStringDelimeter[currentCharacter] ) { | |
| currentlyInString = true; | |
| delimeterOfCurrentString = currentCharacter; | |
| literalStrings[numberOfCurrentString] = {startPoint: i}; | |
| }; | |
| }; | |
| previousCharacter = currentCharacter; | |
| }; | |
| // then extract the strings: | |
| var s = literalStrings[0]; | |
| for (var i=0; i < literalStrings.length; i++) { | |
| s = literalStrings[i]; | |
| s.string = queryString.substring(s.startPoint+1, s.endPoint) | |
| }; | |
| return literalStrings; | |
| }; | |
| stripLiteralStrings = function (queryString, literalStrings) { | |
| /* ---------- | |
| replaces every occurance of a literal string inside the queryString | |
| with a numbered placeholder, refering to the associated string in the | |
| array literalStrings | |
| motivation for this: after removing the literal strings we can | |
| perform string manipulations on the query string without having to | |
| worry about certain expressions (AND for Example) being present in literal strings | |
| ---------- */ | |
| var l = literalStrings; | |
| var q = queryString; | |
| for (var i=l.length-1; i>=0 ; i--) { | |
| q = q.substring(0,l[i].startPoint) | |
| + "strippedStrings[" + i + "]" | |
| + q.substr(l[i].endPoint+1) | |
| }; | |
| return q; | |
| }; | |
| getBooleanExpression = function (strippedString) { | |
| var b = strippedString; | |
| var notBooleanStatement = /[^(\s\(\s|\s\)\s|\sAND\s|\sOR\s|\sNOT\s)]+/g ; | |
| // first lets add whitespace arround parenthesis, | |
| // will make it easier to differentiate comparisons from boolen statement | |
| b = b.replace(/\(/g, " ( "); | |
| b = b.replace(/\)/g, " ) "); | |
| // now replace every comparison by a single character 'a' | |
| b = b.replace(notBooleanStatement, "a"); | |
| // now add an incrementing number to these characters | |
| var count = 0; | |
| for (var i=0; i < b.length; i++) { | |
| if (b[i] == 'a') { | |
| b = b.slice(0,i+1)+"["+count+"]"+b.slice(i+1); | |
| count++; | |
| }; | |
| }; | |
| // strip unnesecary whitespace - just for cosmetic reason | |
| b = b.replace(/\s+/g, " "); | |
| // return the produced boolean Expression b | |
| return b; | |
| }; | |
| getStrippedComparisons = function (strippedString) { | |
| var c = strippedString; | |
| var booleanStatements = /\s+(?:(?:\(|\)|AND|OR|NOT)\s+)+/g; | |
| var strippedComparisons = []; | |
| // make sure there is whitespace arround parenthesis | |
| c = c.replace(/\(/g, " ( "); | |
| c = c.replace(/\)/g, " ) "); | |
| // remove boolean statements at the beginning and the end | |
| c = c.replace(/^\s*(?:(?:\(|\)|AND|OR|NOT)\s+)+/g, ""); | |
| c = c.replace(/\s+(?:(?:\(|\)|AND|OR|NOT)\s+)+$/g, ""); | |
| // split the string | |
| strippedComparisons = c.split(booleanStatements); | |
| return strippedComparisons; | |
| }; | |
| constructComparisons = function (strippedComparisons, literalStrings) { | |
| var comparisons = []; | |
| return comparisons; | |
| }; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment