Last active
August 29, 2015 14:00
-
-
Save mattwarren/11398785 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Globalization; | |
using Irony.Parsing; | |
namespace Grammar | |
{ | |
[Language("LuceneGrammar", "1.0", "Lucene Grammar")] | |
public class LuceneGrammar : Irony.Parsing.Grammar | |
{ | |
public LuceneGrammar() | |
: base(true) // true means case sensitive | |
{ | |
this.GrammarComments = | |
"Implementation of the Lucene Query Syntax\r\n" + | |
"See http://lucene.apache.org/core/2_9_4/queryparsersyntax.html \r\n" + | |
"and https://today.java.net/pub/a/today/2003/11/07/QueryParserRules.html \r\n" + | |
"and http://lucene.apache.org/core/3_4_0/api/all/org/apache/lucene/queryParser/standard/StandardQueryParser.html \r\n" + | |
"and http://umbracosearchtools.codeplex.com/SourceControl/changeset/view/15506#48328"; | |
// Terminals | |
var StringLiteral = new StringLiteral("StringLiternal", "\"", StringOptions.NoEscapes); | |
var TextValue = new RegexBasedTerminal("TextValue", @"([^:""+\-\s~\[\]\{\}\(\)\^]" + | |
@"|\\\^|\\{|\\\[|\\~|\\""|\\:)+"); | |
var FieldName = new RegexBasedTerminal("FieldName", @"([\w][\w\d,_\-\.]*|\*)\s*:"); | |
// Lucene supports escaping special characters that are part of the query syntax. The current list special characters are | |
// + - && || ! ( ) { } [ ] ^ " ~ * ? : \ | |
// To escape these character use the \ before the character. For example to search for (1+1):2 use the query: | |
// \(1\+1\)\:2 | |
var ImpliedOr = new ImpliedSymbolTerminal("ImpliedOr"); // in Lucene "OR" is the default if nothing is supplied | |
// NonTerminals | |
var BinaryExpression = new NonTerminal("BinaryExpression"); | |
var BinaryOp = new NonTerminal("BinaryOp"); | |
var Query = new NonTerminal("Query"); | |
var Clause = new NonTerminal("Clause"); | |
var SubClause = new NonTerminal("SubClause"); | |
var Term = new NonTerminal("Term"); | |
var Range = new NonTerminal("Range"); | |
var OpenRange = new NonTerminal("OpenRange"); | |
var CloseRange = new NonTerminal("CloseRange"); | |
var QualifiedTerm = new NonTerminal("QualifiedTerm"); | |
var UnqualifiedTerm = new NonTerminal("UnqualifiedTerm"); | |
var Required = new NonTerminal("Required"); | |
var Prohibited = new NonTerminal("Prohibited"); | |
// A query is broken up into terms and operators. There are two types of terms: Single Terms and Phrases. | |
// - A Single Term is a single word such as "test" or "hello". | |
// - A Phrase is a group of words surrounded by double quotes such as "hello dolly". | |
// Multiple terms can be combined together with Boolean operators to form a more complex query (see below). | |
//Query -> Clause (And Clause | Or Clause | NotClause | Clause)*; | |
//NotClause -> Not Clause; | |
//Clause -> (SubClause | Term); | |
//SubClause -> (PLUS Query) | (MINUS Query) | (OPEN_PAREN Query CLOSE_PAREN); | |
//Term -> Range | QualifiedTerm | UnqualifiedTerm; | |
//QualifiedTerm -> FIELD_NAME ( Range | TEXT_VALUE | STRING_LITERAL ); | |
//Range -> OPEN_SQUARE UnqualifiedTerm TO UnqualifiedTerm CLOSE_SQUARE; | |
//UnqualifiedTerm -> (STRING_LITERAL | TEXT_VALUE) Fuzzy? Boost? ; | |
this.Root = Query; | |
Query.Rule = MakeStarRule(Query, BinaryExpression); | |
//Query.Rule = Clause + MakePlusRule(Query, BinaryExpression); // This should be valid in Irony | |
BinaryExpression.Rule = BinaryOp + Clause; | |
BinaryOp.Rule = ImpliedOr | "AND" | "&&" | "OR" | "||"; | |
Clause.Rule = SubClause | Term; | |
Required.Rule = ToTerm("+", "Plus") + Term; | |
Prohibited.Rule = ToTerm("-", "Minus") + Term; | |
SubClause.Rule = Required | Prohibited | ("(" + Query + ")") | (FieldName + "(" + Query + ")"); | |
//Term.Rule = Range | QualifiedTerm | UnqualifiedTerm; | |
Term.Rule = QualifiedTerm | UnqualifiedTerm; | |
QualifiedTerm.Rule = FieldName + (Range | StringLiteral | TextValue); // UnqualifiedTerm; | |
UnqualifiedTerm.Rule = StringLiteral | TextValue; // | SubClause; // Add in Fuzzy and Boost (Both OPTIONAL) | |
OpenRange.Rule = ToTerm("[") | "{"; | |
CloseRange.Rule = ToTerm("]") | "}"; | |
Range.Rule = OpenRange + UnqualifiedTerm + "TO" + UnqualifiedTerm + CloseRange; | |
// These appear to be states that we have to still go though, but AREN'T reported in the parse tree (sometimes???) | |
// QualifiedTerm isn't valid in here?! See Parser Language Errors | |
MarkTransient(Clause, /*SubClause,*/ Term, BinaryOp, /*QualifiedTerm,*/ UnqualifiedTerm); | |
MarkPunctuation("(", ")"); | |
RegisterOperators(10, "OR", "||"); | |
RegisterOperators(20, "AND", "&&"); //, "-"); | |
RegisterOperators(20, ImpliedOr); | |
//Register brace pairs to improve error reporting | |
RegisterBracePair("(", ")"); | |
// Don't use RegisterBracePair() with the Range braches ('[' & '{'), they don't always balance!! | |
//Do not report ImpliedAnd as expected symbol - it is not really a symbol | |
AddToNoReportGroup(ImpliedOr); | |
//also do not report braces as expected | |
AddToNoReportGroup("(", ")"); | |
MarkReservedWords("AND", "OR", "NOT", "TO", "&&", "||", "+", "-"); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment