Created
April 2, 2010 23:12
-
-
Save datagraph/353854 to your computer and use it in GitHub Desktop.
A parser combinator SPARQL grammar for Scala (work-in-progress).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* A parser combinator SPARQL grammar for Scala. | |
* Written in November 2009 by Arto Bendiken <http://ar.to/> | |
* | |
* This is free and unencumbered software released into the public domain. | |
* For more information, please refer to <http://unlicense.org/> | |
*/ | |
package org.datagraph.sparql | |
import java.io.FileReader | |
import scala.util.parsing.combinator._ | |
import scala.collection.mutable.ListBuffer | |
/** | |
* @see http://www.w3.org/TR/rdf-sparql-query/#grammar | |
*/ | |
object Grammar extends RegexParsers { | |
def main(args: Array[String]) { | |
for (arg <- args) { | |
println(parseAll(Query, new FileReader(arg))) | |
} | |
} | |
// [1] | |
def Query: Parser[List[Any]] = | |
Prologue ~ (SelectQuery | ConstructQuery | DescribeQuery | AskQuery) ^^ { | |
case List() ~ query => query | |
case prolog ~ query => prolog ++ query | |
} | |
// [2] | |
def Prologue: Parser[List[Any]] = | |
(BaseDecl?) ~ (PrefixDecl*) ^^ { | |
case None ~ List() => List() | |
case base ~ List() => List(base) | |
case None ~ prefixes => List(prefixes) | |
case base ~ prefixes => List(base, prefixes) | |
} | |
// [3] | |
def BaseDecl: Parser[List[Any]] = | |
"BASE" ~ IRI_REF ^^ { | |
case _ ~ iri => List('BASE, iri) | |
} | |
// [4] | |
def PrefixDecl: Parser[List[Any]] = | |
"PREFIX" ~ PNAME_NS ~ IRI_REF ^^ { | |
case _ ~ ns ~ iri => List('PREFIX, ns, iri) | |
} | |
// [5] | |
def SelectQuery: Parser[List[Any]] = | |
"SELECT" ~ (("DISTINCT" | "REDUCED")?) ~ ("*" | (Var+)) ~ (DatasetClause*) ~ WhereClause ~ SolutionModifier ^^ { | |
case _ ~ None ~ vs ~ List() ~ w ~ None => List('SELECT, vs, w) | |
case _ ~ None ~ vs ~ List() ~ w ~ s => List('SELECT, vs, w, s) | |
case _ ~ dr ~ vs ~ d ~ w ~ s => List('SELECT, dr, vs, d, w, s) | |
} | |
// [6] | |
def ConstructQuery: Parser[List[Any]] = | |
"CONSTRUCT" ~ ConstructTemplate ~ (DatasetClause*) ~ WhereClause ~ SolutionModifier ^^ { | |
case _ => List('CONSTRUCT) // TODO | |
} | |
// [7] | |
def DescribeQuery: Parser[List[Any]] = | |
"DESCRIBE" ~ ("*" | (VarOrIRIref+)) ~ (DatasetClause*) ~ (WhereClause?) ~ SolutionModifier ^^ { | |
case _ => List('DESCRIBE) // TODO | |
} | |
// [8] | |
def AskQuery: Parser[List[Any]] = | |
"ASK" ~ (DatasetClause*) ~ WhereClause ^^ { | |
case _ => List('ASK) // TODO | |
} | |
// [9] | |
def DatasetClause: Parser[List[Any]] = | |
"FROM" ~> (DefaultGraphClause | NamedGraphClause) ^^ (List('FROM, _)) | |
// [10] | |
def DefaultGraphClause: Parser[Any] = | |
SourceSelector | |
// [11] | |
def NamedGraphClause: Parser[List[Any]] = | |
"NAMED" ~> SourceSelector ^^ (List('NAMED, _)) | |
// [12] | |
def SourceSelector: Parser[Any] = | |
IRIref | |
// [13] | |
def WhereClause: Parser[List[Any]] = | |
("WHERE"?) ~> GroupGraphPattern ^^ (List('WHERE, _)) | |
// [14] | |
def SolutionModifier: Parser[Any] = | |
(OrderClause?) ~ (LimitOffsetClauses?) ^^ { | |
case None ~ None => None | |
case xy => xy | |
} | |
// [15] | |
def LimitOffsetClauses: Parser[Any] = | |
(LimitClause ~ (OffsetClause?)) | (OffsetClause ~ (LimitClause?)) | |
// [16] | |
def OrderClause: Parser[List[Any]] = | |
"ORDER" ~ "BY" ~ (OrderCondition+) ^^ { | |
case _ ~ _ ~ condition => List('ORDERBY, condition) | |
} | |
// [17] | |
def OrderCondition: Parser[Any] = | |
(("ASC" | "DESC") ~ BrackettedExpression) | (Constraint | Var) | |
// [18] | |
def LimitClause: Parser[List[Any]] = | |
"LIMIT" ~> INTEGER ^^ (List('LIMIT, _)) | |
// [19] | |
def OffsetClause: Parser[List[Any]] = | |
"OFFSET" ~> INTEGER ^^ (List('OFFSET, _)) | |
// [20] | |
def GroupGraphPattern: Parser[List[Any]] = | |
"{" ~> (TriplesBlock?) ~ (((GraphPatternNotTriples | Filter) <~ ("."?) ~> (TriplesBlock?))*) <~ "}" ^^ { | |
case Some(a) ~ List() => a | |
case a ~ b => List(a, b) // FIXME | |
} | |
// [21] | |
def TriplesBlock: Parser[List[Any]] = | |
TriplesSameSubject ~ (("." ~ (TriplesBlock?))?) ^^ { | |
case a ~ None => List('BGP) ++ a | |
case a ~ b => List('BGP, a, b) // FIXME | |
} | |
// [22] | |
def GraphPatternNotTriples: Parser[Any] = | |
OptionalGraphPattern | GroupOrUnionGraphPattern | GraphGraphPattern | |
// [23] | |
def OptionalGraphPattern: Parser[List[Any]] = | |
"OPTIONAL" ~> GroupGraphPattern ^^ (List('OPTIONAL, _)) | |
// [24] | |
def GraphGraphPattern: Parser[List[Any]] = | |
"GRAPH" ~ VarOrIRIref ~ GroupGraphPattern ^^ { | |
case _ ~ varOrIRI ~ bgp => List('GRAPH, varOrIRI, bgp) | |
} | |
// [25] | |
def GroupOrUnionGraphPattern: Parser[Any] = | |
GroupGraphPattern ~ (("UNION" ~ GroupGraphPattern)*) | |
// [26] | |
def Filter: Parser[List[Any]] = | |
"FILTER" ~> Constraint ^^ (List('FILTER, _)) | |
// [27] | |
def Constraint: Parser[Any] = | |
BrackettedExpression | BuiltInCall | FunctionCall | |
// [28] | |
def FunctionCall: Parser[Any] = | |
IRIref ~ ArgList | |
// [29] | |
def ArgList: Parser[Any] = | |
NIL | ("(" ~> repsep(Expression, ",") <~ ")") | |
// [30] | |
def ConstructTemplate: Parser[Any] = | |
"{" ~> (ConstructTriples?) <~ "}" | |
// [31] | |
def ConstructTriples: Parser[Any] = | |
TriplesSameSubject ~ (("." ~> (ConstructTriples?))?) | |
// [32] | |
def TriplesSameSubject: Parser[List[Any]] = | |
TriplesSameSubject1 | TriplesSameSubject2 | |
def TriplesSameSubject1: Parser[List[(Any, Any, Any)]] = | |
(VarOrTerm ~ PropertyListNotEmpty) ^^ { | |
case s ~ polist => | |
var out = new ListBuffer[(Any, Any, Any)] | |
for ((p, os) <- polist) { | |
for (o <- os) out += (s, p, o) | |
} | |
out.toList | |
} | |
def TriplesSameSubject2: Parser[List[Any]] = | |
(TriplesNode ~ PropertyList) ^^ { | |
case a ~ b => List(a, b) // FIXME | |
} | |
// [33] | |
def PropertyListNotEmpty: Parser[List[(Any, List[Any])]] = | |
repsep(Verb ~ ObjectList ^^ { case p ~ olist => (p, olist) }, ";") <~ (";"?) | |
// [34] | |
def PropertyList: Parser[Any] = | |
PropertyListNotEmpty? | |
// [35] | |
def ObjectList: Parser[List[Any]] = | |
repsep(Object, ",") | |
// [36] | |
def Object: Parser[Any] = | |
GraphNode | |
// [37] | |
def Verb: Parser[Any] = | |
VarOrIRIref | "a" // TODO: "a" => rdf:type | |
// [38] | |
def TriplesNode: Parser[Any] = | |
Collection | BlankNodePropertyList | |
// [39] | |
def BlankNodePropertyList: Parser[Any] = | |
"[" ~> PropertyListNotEmpty <~ "]" | |
// [40] | |
def Collection: Parser[Any] = | |
"(" ~> (GraphNode+) <~ ")" | |
// [41] | |
def GraphNode: Parser[Any] = | |
VarOrTerm | TriplesNode | |
// [42] | |
def VarOrTerm: Parser[Any] = | |
Var | GraphTerm | |
// [43] | |
def VarOrIRIref: Parser[Any] = | |
Var | IRIref | |
// [44] | |
def Var: Parser[Any] = //Parser[Symbol] = | |
VAR1 | VAR2 ^^ { case x => println(x); ('VAR, x) } | |
// [45] | |
def GraphTerm: Parser[Any] = | |
IRIref | RDFLiteral | NumericLiteral | BooleanLiteral | BlankNode | NIL | |
// [46] | |
def Expression: Parser[Any] = | |
ConditionalOrExpression | |
// [47] | |
def ConditionalOrExpression: Parser[Any] = | |
ConditionalAndExpression ~ (("||" ~ ConditionalAndExpression)*) | |
// [48] | |
def ConditionalAndExpression: Parser[Any] = | |
ValueLogical ~ (("&&" ~ ValueLogical)*) | |
// [49] | |
def ValueLogical: Parser[Any] = | |
RelationalExpression | |
// [50] | |
def RelationalExpression: Parser[Any] = | |
NumericExpression ~ | |
((("=" ~ NumericExpression) | | |
("!=" ~ NumericExpression) | | |
("<" ~ NumericExpression) | | |
(">" ~ NumericExpression) | | |
("<=" ~ NumericExpression) | | |
(">=" ~ NumericExpression))?) | |
// [51] | |
def NumericExpression: Parser[Any] = | |
AdditiveExpression | |
// [52] | |
def AdditiveExpression: Parser[Any] = | |
MultiplicativeExpression ~ | |
((("+" ~ MultiplicativeExpression) | | |
("-" ~ MultiplicativeExpression) | | |
NumericLiteralPositive | | |
NumericLiteralNegative)*) | |
// [53] | |
def MultiplicativeExpression: Parser[Any] = | |
UnaryExpression ~ ((("*" ~ UnaryExpression) | ("/" ~ UnaryExpression))*) | |
// [54] | |
def UnaryExpression: Parser[Any] = | |
"!" ~ PrimaryExpression | | |
"+" ~ PrimaryExpression | | |
"-" ~ PrimaryExpression | | |
PrimaryExpression | |
// [55] | |
def PrimaryExpression: Parser[Any] = | |
BrackettedExpression | BuiltInCall | IRIrefOrFunction | RDFLiteral | NumericLiteral | BooleanLiteral | Var | |
// [56] | |
def BrackettedExpression: Parser[Any] = | |
"(" ~> Expression <~ ")" | |
// [57] | |
def BuiltInCall: Parser[Any] = | |
"STR" ~ "(" ~> Expression <~ ")" | | |
"LANG" ~ "(" ~> Expression <~ ")" | | |
"LANGMATCHES" ~ "(" ~> Expression <~ "," ~> Expression <~ ")" | | |
"DATATYPE" ~ "(" ~> Expression <~ ")" | | |
"BOUND" ~ "(" ~> Var <~ ")" | | |
"sameTerm" ~ "(" ~> Expression <~ "," ~> Expression <~ ")" | | |
"isIRI" ~ "(" ~> Expression <~ ")" | | |
"isURI" ~ "(" ~> Expression <~ ")" | | |
"isBLANK" ~ "(" ~> Expression <~ ")" | | |
"isLITERAL" ~ "(" ~> Expression <~ ")" | | |
RegexExpression | |
// [58] | |
def RegexExpression: Parser[Any] = | |
"REGEX" ~ "(" ~ Expression <~ "," ~> Expression ~ (("," ~> Expression)?) ~ ")" | |
// [59] | |
def IRIrefOrFunction: Parser[Any] = | |
IRIref ~ (ArgList?) | |
// [60] | |
def RDFLiteral: Parser[Any] = | |
String ~ ((LANGTAG | ("^^" ~> IRIref))?) | |
// [61] | |
def NumericLiteral: Parser[Any] = | |
NumericLiteralUnsigned | NumericLiteralPositive | NumericLiteralNegative | |
// [62] | |
def NumericLiteralUnsigned: Parser[Any] = | |
INTEGER | DECIMAL | DOUBLE | |
// [63] | |
def NumericLiteralPositive: Parser[Any] = | |
INTEGER_POSITIVE | DECIMAL_POSITIVE | DOUBLE_POSITIVE | |
// [64] | |
def NumericLiteralNegative: Parser[Any] = | |
INTEGER_NEGATIVE | DECIMAL_NEGATIVE | DOUBLE_NEGATIVE | |
// [65] | |
def BooleanLiteral: Parser[Any] = | |
"true" | "false" | |
// [66] | |
def String: Parser[Any] = | |
STRING_LITERAL1 | STRING_LITERAL2 | STRING_LITERAL_LONG1 | STRING_LITERAL_LONG2 | |
// [67] | |
def IRIref: Parser[Any] = | |
IRI_REF | PrefixedName | |
// [68] | |
def PrefixedName: Parser[Any] = | |
PNAME_LN | PNAME_NS | |
// [69] | |
def BlankNode: Parser[Any] = | |
BLANK_NODE_LABEL | ANON | |
// [70] | |
def IRI_REF: Parser[Any] = | |
"<" ~> """([^<>"{}|^`\])*""".r <~ ">" // FIXME | |
// [71] | |
def PNAME_NS: Parser[Any] = | |
(PN_PREFIX?) <~ ":" | |
// [72] | |
def PNAME_LN: Parser[Any] = | |
PNAME_NS ~ PN_LOCAL | |
// [73] | |
def BLANK_NODE_LABEL: Parser[Any] = | |
"_:" ~> PN_LOCAL | |
// [74] | |
def VAR1: Parser[Symbol] = | |
"?" ~> VARNAME | |
// [75] | |
def VAR2: Parser[Symbol] = | |
"$" ~> VARNAME | |
// [76] | |
def LANGTAG: Parser[Any] = | |
"@" ~> "[a-zA-Z]+".r ~ (("-" ~ "[a-zA-Z0-9]+".r)*) | |
// [77] | |
def INTEGER: Parser[Int] = | |
"[0-9]+".r ^^ (_.toInt) | |
// [78] | |
def DECIMAL: Parser[Any] = | |
("[0-9]+".r ~ "." ~ "[0-9]*".r) | ("." ~ "[0-9]+".r) | |
// [79] | |
def DOUBLE: Parser[Any] = | |
("[0-9]+".r ~ "." ~ "[0-9]*".r ~ EXPONENT) | ("." ~ "([0-9])+".r ~ EXPONENT) | ("([0-9])+".r ~ EXPONENT) | |
// [80] | |
def INTEGER_POSITIVE: Parser[Int] = | |
"+" ~> INTEGER | |
// [81] | |
def DECIMAL_POSITIVE: Parser[Any] = | |
"+" ~> DECIMAL | |
// [82] | |
def DOUBLE_POSITIVE: Parser[Any] = | |
"+" ~> DOUBLE | |
// [83] | |
def INTEGER_NEGATIVE: Parser[Int] = | |
"-" ~> INTEGER ^^ (_ * -1) | |
// [84] | |
def DECIMAL_NEGATIVE: Parser[Any] = | |
"-" ~ DECIMAL // TODO | |
// [85] | |
def DOUBLE_NEGATIVE: Parser[Any] = | |
"-" ~ DOUBLE // TODO | |
// [86] | |
def EXPONENT: Parser[Any] = | |
"[eE]".r ~ "[+-]?".r ~ "[0-9]+".r | |
// [87] | |
def STRING_LITERAL1: Parser[String] = | |
"'" ~> TODO <~ "'" | |
// [88] | |
def STRING_LITERAL2: Parser[String] = | |
"\"" ~> TODO <~ "\"" | |
// [89] | |
def STRING_LITERAL_LONG1: Parser[String] = | |
"'''" ~> TODO <~ "'''" | |
// [90] | |
def STRING_LITERAL_LONG2: Parser[String] = | |
"\"\"\"" ~> TODO <~ "\"\"\"" | |
// [91] | |
def ECHAR: Parser[Any] = | |
"\\" ~ """[tbnrf\"']""".r | |
// [92] | |
def NIL: Parser[Any] = | |
"(" ~ (WS*) ~ ")" | |
// [93] | |
def WS: Parser[Any] = | |
"""\s+""".r // FIXME? | |
// [94] | |
def ANON: Parser[Any] = | |
"[" ~ (WS*) ~ "]" | |
// [95] | |
def PN_CHARS_BASE: Parser[Any] = | |
"[A-Z]".r | "[a-z]".r | TODO | |
// [96] | |
def PN_CHARS_U: Parser[Any] = | |
PN_CHARS_BASE | "_" | |
// [97] | |
def VARNAME: Parser[Symbol] = | |
"""[a-zA-Z_]\w*""".r ^^ (Symbol(_)) // FIXME | |
// [98] | |
def PN_CHARS: Parser[Any] = | |
PN_CHARS_U | "-" | "[0-9]".r | TODO | |
// [99] | |
def PN_PREFIX: Parser[Any] = | |
PN_CHARS_BASE ~ ((((PN_CHARS | ".")*) ~ PN_CHARS)?) | |
// [100] | |
def PN_LOCAL: Parser[Any] = | |
(PN_CHARS_U | "[0-9]".r) ~ ((((PN_CHARS | ".")*) ~ PN_CHARS)?) | |
def TODO: Parser[String] = "TODO".r | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice work! Wish you'd finish this and document its usage.