Last active
August 18, 2017 20:08
-
-
Save thomsbg/dad4ca8484fbc286ef05c44191e7968f to your computer and use it in GitHub Desktop.
hand-rolled query parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function parse(query) { | |
var curTerm = null | |
var curPhrase = null | |
var curKey = null | |
var result = [] | |
for (var char of query) { | |
switch (char) { | |
case '"': | |
if (curPhrase != null) { | |
if (curKey) { | |
// close keyword value phrase | |
result.push({ type: 'keyword', key: curKey, phrase: curPhrase }) | |
curKey = null | |
} else { | |
// close normal phrase | |
result.push({ type: 'phrase', phrase: curPhrase }) | |
} | |
curPhrase = null | |
} else { | |
if (curTerm != null) { | |
// phrase begins in the middle of a word | |
// use the first half of the word as a normal term | |
result.push({ type: 'term', value: curTerm }) | |
curTerm = null | |
} | |
// open new phrase | |
curPhrase = '' | |
} | |
break | |
case ':': | |
if (curPhrase != null) { | |
// add to open phrase | |
curPhrase += char | |
} else if (curTerm != null) { | |
// curTerm is a keyword key | |
curKey = curTerm | |
curTerm = null | |
} else { | |
// a `:` preceeded by whitespace is meaningless | |
} | |
break | |
case ' ': | |
case '\n': | |
case '\t': | |
if (curPhrase != null) { | |
// add whitespace to an open phrase | |
curPhrase += char | |
} else if (curKey != null) { | |
// if building a keyword | |
if (curTerm != null) { | |
// use the current term as the value | |
result.push({ type: 'keyword', key: curKey, value: curTerm }) | |
} else { | |
// with no value, treat the key as a normal term | |
result.push({ type: 'term', value: curKey }) | |
} | |
curTerm = null | |
curKey = null | |
} else if (curTerm != null) { | |
// terms are separated by whitespace | |
result.push({ type: 'term', value: curTerm }) | |
curTerm = null | |
} | |
break | |
default: | |
if (curPhrase != null) { | |
// if a phrase is open, add to it | |
curPhrase += char | |
} else if (curTerm != null) { | |
// if a term is present, add to it | |
curTerm += char | |
} else { | |
// this is the start of a new term | |
curTerm = char | |
} | |
break | |
} | |
} | |
if (curKey) { | |
if (curPhrase) { | |
// keyword with unfinished phrase value | |
result.push({ type: 'keyword', key: curKey, phrase: curPhrase }) | |
} else if (curTerm) { | |
// keyword with normal term value | |
result.push({ type: 'keyword', key: curKey, value: curTerm }) | |
} else { | |
// keyword with no value, use as normal term | |
result.push({ type: 'term', value: curKey }) | |
} | |
} else if (curPhrase) { | |
// unclosed phrase | |
result.push({ type: 'phrase', value: curPhrase }) | |
} else if (curTerm) { | |
// trailing term | |
result.push({ type: 'term', value: curTerm }) | |
} | |
return result | |
} | |
console.log(parse('one two')) | |
console.log(parse('"a phrase" term')) | |
console.log(parse('"a phrase" term "broken phrase')) | |
console.log(parse('term by:me "a phrase"')) | |
console.log(parse('by:"keyword phrase" three')) | |
console.log(parse('one by: me two :me by:"keyword phrase" three')) | |
console.log(parse('quote"in the middle')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment