Created
April 13, 2012 09:59
-
-
Save honnix/2375506 to your computer and use it in GitHub Desktop.
simple xpath alike parser in scala combinator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// _[@type=="baoc" and ts10[@value=="1" and a==2] and ts20] | |
package com.honnix.xml.transformer | |
import scala.xml.NodeSeq | |
import scala.util.parsing.combinator.JavaTokenParsers | |
object XPathParser { | |
def apply(conditionalPath: String, topSelector: (NodeSeq, String) => NodeSeq) = | |
new XPathParser(conditionalPath, topSelector) | |
} | |
class XPathParser(conditionalPath: String, topSelector: (NodeSeq, String) => NodeSeq) extends JavaTokenParsers { | |
def expr: Parser[NodeSeq => NodeSeq] = ident~("["~>term<~"]") ^^ | |
{ case ident~term => topSelector(_, ident).filter(term) } | |
def term: Parser[NodeSeq => Boolean] = subTerm~rep("or"~>subTerm) ^^ | |
{ case subTerm~subTermList => (x: NodeSeq) => subTerm(x) || subTermList.exists(_(x)) } | |
def subTerm: Parser[NodeSeq => Boolean] = conditionalNode~rep("and"~>conditionalNode) ^^ { | |
case conditionalNode~conditionalNodeList => | |
(x: NodeSeq) => conditionalNode(x) && conditionalNodeList.forall(_(x)) | |
} | |
def conditionalNode: Parser[NodeSeq => Boolean] = ( | |
expr ^^ (x => !x(_: NodeSeq).isEmpty) | |
| selector ^^ (x => !x(_: NodeSeq).isEmpty) | |
| condition ^^ (x => x(_)) | |
| "("~>term<~")" ^^ (x => x(_)) | |
) | |
def selector: Parser[NodeSeq => NodeSeq] = ident~"\\"~( | |
selector ^^ (x => !x(_: NodeSeq).isEmpty) | |
| condition ^^ (x => x(_)) | |
) ^^ | |
{ case ident~"\\"~f => _ \ ident filter f } | |
def condition: Parser[NodeSeq => Boolean] = ident~opt(op~right) ^^ { | |
case field~Some("=="~(value: String)) => (x: NodeSeq) => (x \ field text) == value | |
case field~Some("!="~(value: String)) => (x: NodeSeq) => (x \ field text) != value | |
case field~Some(">"~(value: String)) => (x: NodeSeq) => (x \ field text) > value | |
case field~Some("<"~(value: String)) => (x: NodeSeq) => (x \ field text) < value | |
case field~Some(">="~(value: String)) => (x: NodeSeq) => (x \ field text) >= value | |
case field~Some("<="~(value: String)) => (x: NodeSeq) => (x \ field text) <= value | |
case field~Some("=="~(value: Float)) => (x: NodeSeq) => | |
val text = x \ field text; | |
if (!text.isEmpty) text.toFloat == value else false | |
case field~Some("!="~(value: Float)) => (x: NodeSeq) => | |
val text = x \ field text; | |
if (!text.isEmpty) text.toFloat != value else false | |
case field~Some(">"~(value: Float)) => (x: NodeSeq) => | |
val text = x \ field text; | |
if (!text.isEmpty) text.toFloat > value else false | |
case field~Some("<"~(value: Float)) => (x: NodeSeq) => | |
val text = x \ field text; | |
if (!text.isEmpty) text.toFloat < value else false | |
case field~Some(">="~(value: Float)) => (x: NodeSeq) => | |
val text = x \ field text; | |
if (!text.isEmpty) text.toFloat >= value else false | |
case field~Some("<="~(value: Float)) => (x: NodeSeq) => | |
val text = x \ field text; | |
if (!text.isEmpty) text.toFloat <= value else false | |
case field~None => (x: NodeSeq) => !(x \ field isEmpty) | |
} | |
def op: Parser[String] = """[!=<>]{1,2}""".r | |
def right: Parser[Any] = ( | |
floatingPointNumber ^^ (_.toFloat) | |
| stringLiteral ^^ (x => x.substring(1, x.length - 1)) | |
) | |
override def ident: Parser[String] = """@?[a-zA-Z_]\w*""".r | |
def parse(nodeSeq: NodeSeq): NodeSeq = { | |
parseAll(expr, conditionalPath) match { | |
case Success(r, _) => r(nodeSeq) | |
case Failure(_, _) => null | |
case Error(_, _) => null | |
} | |
} | |
} | |
object XmlTransformer { | |
private def copyNodeSeq(nodeSeq: NodeSeq): NodeSeq = | |
for (node <- nodeSeq) yield copyNode(node) | |
private def copyNode(node: Node): Node = node match { | |
case e: Elem => | |
Elem(e.prefix, | |
e.label, | |
e.attributes, | |
e.scope, | |
(for (n <- e.child) yield copyNode(n)):_*) | |
case t: Text => | |
Text(t.text) | |
case d: PCData => | |
PCData(d.text) | |
case _ => null | |
} | |
implicit def domToElem(dom: Document) = { | |
val charWriter = new CharArrayWriter | |
TransformerFactory.newInstance.newTransformer.transform(new DOMSource(dom), | |
new StreamResult(charWriter)) | |
XML.load(new CharArrayReader(charWriter.toCharArray)) | |
} | |
implicit def elemToDom(elem: Elem) = { | |
val writer = new StringWriter | |
XML.write(writer, elem, "UTF-8", false, null) | |
XMLUtil.getDocument(writer.toString) | |
} | |
implicit def nodeWrapper(node: Node) = new { | |
def !(): Node = copyNode(node) | |
} | |
implicit def nodeSeqWrapper(nodeSeq: NodeSeq) = new { | |
def !(): NodeSeq = if (!nodeSeq.isEmpty) copyNodeSeq(nodeSeq) else NodeSeq.Empty | |
def \%(conditionalPath: String): NodeSeq = { | |
XPathParser(conditionalPath, (_: NodeSeq) \ (_: String)).parse(nodeSeq) | |
} | |
def \\%(conditionalPath: String): NodeSeq = { | |
XPathParser(conditionalPath, (_: NodeSeq) \\ (_: String)).parse(nodeSeq) | |
} | |
} | |
} | |
trait XmlTransformer { | |
protected def doTransform(input: Elem): Elem | |
def transform(input: Document): Document = { | |
import XmlTransformer.{domToElem, elemToDom} | |
doTransform(input) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment