Created
April 29, 2013 15:20
-
-
Save JnBrymn/5482288 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class WimpySwanParser extends BaseParser<SpanQuery> { | |
int MAX_PARAGRAPH_LENGTH = 5000; | |
int MAX_SENTENCE_LENGTH = 500; | |
public Rule Query() { | |
return Sequence(OrExpression(),EOI); | |
} | |
public Rule OrExpression() { | |
return Sequence( | |
SameExpression(), | |
ZeroOrMore( | |
Sequence( | |
OR(), | |
SameExpression(), | |
push(new SpanOrQuery(pop(1), pop())) | |
) | |
) | |
); | |
} | |
public Rule SameExpression() { | |
return Sequence( | |
WithExpression(), | |
ZeroOrMore( | |
Sequence( | |
SAME(), | |
WithExpression(), | |
push(new SpanNearQuery( | |
new SpanQuery[] { pop(1), pop() }, | |
MAX_PARAGRAPH_LENGTH, false) | |
)) | |
) | |
); | |
} | |
public Rule WithExpression() { | |
return Sequence( | |
AdjNearExpression(), | |
ZeroOrMore( | |
Sequence( | |
WITH(), | |
AdjNearExpression(), | |
push(new SpanNearQuery( | |
new SpanQuery[] { pop(1), pop() }, | |
MAX_SENTENCE_LENGTH, false) | |
)) | |
) | |
); | |
} | |
public Rule AdjNearExpression() { | |
return Sequence( | |
Term(), | |
ZeroOrMore(FirstOf( | |
Sequence( | |
NEAR(), | |
Term(), | |
push(new SpanNearQuery( | |
new SpanQuery[] { pop(1), pop() }, | |
1, false) | |
)), | |
Sequence( | |
ADJ(), | |
Term(), | |
push(new SpanNearQuery( | |
new SpanQuery[] { pop(1), pop() }, | |
1, true) | |
)) | |
) | |
) | |
); | |
} | |
public Rule Term() { | |
return Sequence( | |
OneOrMore(Char()), | |
push(new SpanTermQuery(new Term(match()))) | |
); | |
} | |
public Rule Char() { | |
return AnyOf("0123456789" + | |
"abcdefghijklmnopqrstuvwxyz" + | |
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" + | |
"-_" | |
); | |
} | |
Rule WhiteSpace() { | |
return OneOrMore(AnyOf(" \t\f")); | |
} | |
////////////////////////////////////////////////// | |
public Rule OR() { | |
return Sequence(IgnoreCase("OR"), WhiteSpace()); | |
} | |
public Rule SAME() { | |
return Sequence(IgnoreCase("SAME"), WhiteSpace()); | |
} | |
public Rule WITH() { | |
return Sequence(IgnoreCase("WITH"), WhiteSpace()); | |
} | |
public Rule NEAR() { | |
return Sequence(IgnoreCase("NEAR"), WhiteSpace()); | |
} | |
public Rule ADJ() { | |
return Sequence(IgnoreCase("ADJ"), WhiteSpace()); | |
} | |
public static void main(String[] args) { | |
WimpySwanParser parser = Parboiled.createParser(WimpySwanParser.class); | |
String input = "apple AND banana NEAR coconut"; | |
ParsingResult<?> result = new RecoveringParseRunner<SpanQuery>(parser.Query()).run(input); | |
SpanQuery node = (SpanQuery) result.parseTreeRoot.getValue(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment