Skip to content

Instantly share code, notes, and snippets.

@thomsbg
Last active August 18, 2017 20:08
Show Gist options
  • Save thomsbg/dad4ca8484fbc286ef05c44191e7968f to your computer and use it in GitHub Desktop.
Save thomsbg/dad4ca8484fbc286ef05c44191e7968f to your computer and use it in GitHub Desktop.
hand-rolled query parser
function parse(query) {
var curTerm = null
var curPhrase = null
var curKey = null
var result = []
for (var char of query) {
switch (char) {
case '"':
if (curPhrase != null) {
if (curKey) {
// close keyword value phrase
result.push({ type: 'keyword', key: curKey, phrase: curPhrase })
curKey = null
} else {
// close normal phrase
result.push({ type: 'phrase', phrase: curPhrase })
}
curPhrase = null
} else {
if (curTerm != null) {
// phrase begins in the middle of a word
// use the first half of the word as a normal term
result.push({ type: 'term', value: curTerm })
curTerm = null
}
// open new phrase
curPhrase = ''
}
break
case ':':
if (curPhrase != null) {
// add to open phrase
curPhrase += char
} else if (curTerm != null) {
// curTerm is a keyword key
curKey = curTerm
curTerm = null
} else {
// a `:` preceeded by whitespace is meaningless
}
break
case ' ':
case '\n':
case '\t':
if (curPhrase != null) {
// add whitespace to an open phrase
curPhrase += char
} else if (curKey != null) {
// if building a keyword
if (curTerm != null) {
// use the current term as the value
result.push({ type: 'keyword', key: curKey, value: curTerm })
} else {
// with no value, treat the key as a normal term
result.push({ type: 'term', value: curKey })
}
curTerm = null
curKey = null
} else if (curTerm != null) {
// terms are separated by whitespace
result.push({ type: 'term', value: curTerm })
curTerm = null
}
break
default:
if (curPhrase != null) {
// if a phrase is open, add to it
curPhrase += char
} else if (curTerm != null) {
// if a term is present, add to it
curTerm += char
} else {
// this is the start of a new term
curTerm = char
}
break
}
}
if (curKey) {
if (curPhrase) {
// keyword with unfinished phrase value
result.push({ type: 'keyword', key: curKey, phrase: curPhrase })
} else if (curTerm) {
// keyword with normal term value
result.push({ type: 'keyword', key: curKey, value: curTerm })
} else {
// keyword with no value, use as normal term
result.push({ type: 'term', value: curKey })
}
} else if (curPhrase) {
// unclosed phrase
result.push({ type: 'phrase', value: curPhrase })
} else if (curTerm) {
// trailing term
result.push({ type: 'term', value: curTerm })
}
return result
}
console.log(parse('one two'))
console.log(parse('"a phrase" term'))
console.log(parse('"a phrase" term "broken phrase'))
console.log(parse('term by:me "a phrase"'))
console.log(parse('by:"keyword phrase" three'))
console.log(parse('one by: me two :me by:"keyword phrase" three'))
console.log(parse('quote"in the middle'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment