pchiusano · February 11, 2011 15:26
diff --git a/XMLEventParser.scala b/XMLEventParser.scala
 package xmlcombinators

 import scala.util.parsing.combinator.Parsers
 import scala.util.parsing.input.{NoPosition, Reader}
 import javax.xml.stream.events.{Attribute, EndElement, XMLEvent}
 import javax.xml.stream.{XMLEventReader, XMLInputFactory}
 import collection.mutable.ArrayBuffer
 import java.io.File

 /**
  * License: MIT license (http://www.opensource.org/licenses/mit-license.php)
  * Copyright (C) 2011 by Capital IQ
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
  * in the Software without restriction, including without limitation the rights
  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  * copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  *
  * @author Anthony Enache, Paul Chiusano
  */

 /** Adapter to run parser combinators using XMLEvent as the token type. 
  * Currently works by grabbing the full list of events up front from a 
  * javax.xml.stream.XMLEventReader, so this is not suitable for streaming.
  * 
  * Examples: 
  *  startElement("foo") <~ endElement("foo") applied to <foo bar="qux" baz="juju"></foo>
  *  results in the Map[String,String] of attrs for the tag 'foo': Map("bar"->"qux", "baz"->"juju")
  * 
  *  startElement("foo") ~ rep(bar) <~ endElement("foo") map { case attrs ~ bars => ... }
  * 
  * Generally, it's better to use the nest and element combinators, to avoid having to 
  * manually match start and end tags in the grammar.
  */
 trait XMLEventParser extends Parsers {
  type Elem = XMLEvent

  implicit def listToJavaList[T](l: Seq[T]) = l.foldLeft(new java.util.ArrayList[T](l.size)) {
    (al, e) => al.add(e)
    al
  }

  /** We return a map of the attributes defined by the start element tag. */
  def startElement(s: String): Parser[Map[String,String]] = new Parser[Map[String,String]] {
    def apply(in: Input) = {
      val elt = in.first
      
      if ( (elt ne null) && elt.isStartElement && elt.asStartElement.getName.getLocalPart == s) {
        val as = elt.asStartElement.getAttributes
        var attributes = Map[String, String]()

        while( as.hasNext ) {
          val attr = as.next.asInstanceOf[Attribute]
          attributes += ( attr.getName.getLocalPart -> attr.getValue )
        }

        Success(attributes, in.rest)
      } else {
        Failure("Expected start element with label " + s + ", but found " + elt, in.rest)
      }
    }
  }

  def endElement(s: String) : Parser[EndElement] =  new Parser[EndElement] {
    def apply(in: Input) = {
      val elt = in.first
      if ( elt.isEndElement && elt.asEndElement.getName.getLocalPart == s) {
        Success(elt.asEndElement, in.rest)
      } else {
        Failure("Expected end element with label " + s + ", but found " + elt, in.rest)
      }
    }
  }

  /** Parser for leaf elements of type <tag attr1="" attr2="" .../>.  From the reader, this would generate
   *  a start element and end element event, of which, only the start element is truly interesting as it
   *  carries the attributes with it.
   */
  def element(s: String) : Parser[Map[String, String]] = startElement(s) <~ endElement(s)

  /** Parser for non-leaf elements. */
  def element[A,B](s: String, inner: Parser[A])(f: ((Map[String,String],A)) => B): Parser[B] = 
    startElement(s) ~ inner <~ endElement(s) ^^ { case attrs ~ a => f((attrs, a)) }

  /** Parser for leaf elements with attributes. The function f extracts an A from these attrs. */
  def element[A](s: String, f: Map[String,String] => A): Parser[A] = 
    startElement(s).map(f) <~ endElement(s)

  /** Parser for elements surrounded by the tag withinTag, whose attributes are ignored. */
  def nest[A](withinTag: String, inner: Parser[A]): Parser[A] = 
    element[A,A](withinTag, inner) { case (attrs,a) => a }

  /** Typesafe choice between two parsers. */
  def choice[A,B](p: Parser[A], p2: Parser[B]): Parser[Either[A,B]] = 
    p.map(a => Left(a)) | p2.map(b => Right(b))

  /** Parser for text within a tag. Example: 
    * nest("foo", textElement) applied to '<foo>thisText</foo>' results in 'thisText'. 
    */
  def textElement: Parser[String] = new Parser[String] {
    def apply(in: Input) = {
      val elt = in.first
      if (elt.isCharacters) {
        Success(elt.asCharacters.getData, in.rest)
      } else {
        Failure("Expected text element, but found " + elt, in.rest)
      }
    }
  }

  /** Parser that extracts the text from an element <s>text</s> */
  def textOnlyElement(s: String): Parser[String] = nest(s, textElement)
  
  /** Parser that extracts an optional text only elelemnt. */
  def optionalTextOnlyElement(s: String): Parser[Option[String]] = nest(s, opt(textElement))

  /** Parse some prefix of reader `in' with parser `p' */
  def parse[T](p: Parser[T], in: Reader[XMLEvent]): ParseResult[T] =
    p(in)

  /** Parse all of reader `in' with parser `p' */
  def parseAll[T](p: Parser[T], in: Reader[XMLEvent]): ParseResult[T] =
    parse(phrase(p), in)

  /** Extract the mandatory attribute from the attribute map */
  def attribute(as: Map[String, String], a: String) = {
    require(as.contains(a))
    as(a)
  }

  /** Extract the value of the attribute, if it exists, otherwise use the default */
  def attribute(as: Map[String, String], a: String, default: String) = as.getOrElse(a, default)
 }

 object EventReader {
  def readEvents(r: java.io.Reader): Array[XMLEvent] = {
    var result = new ArrayBuffer[XMLEvent]()
    val factory = XMLInputFactory.newInstance()
    val reader : XMLEventReader = factory.createXMLEventReader(r)

    while (reader.hasNext) {
      var event = reader.nextEvent()

      if ( !event.isStartDocument && !event.isEndDocument ) {
        if ( event.isCharacters ) {
          if ( !event.asCharacters.isWhiteSpace ) {
            result += event
          }
        } else {
          result += event
        }
      }
    }
    result.toArray
  }
 }

 /**
 * Note that this implementation of the header will eat whitespace including document start and end events
 */
 class EventReader(index: Int, events: Array[XMLEvent] ) extends Reader[XMLEvent] {
  def this(r: java.io.Reader) = this(0, EventReader.readEvents(r))
  def atEnd = index == events.size - 1
  def first = events(index)
  def rest = if ( atEnd ) this else new EventReader(index + 1, events)
  def pos = NoPosition
 }
	package xmlcombinators

	import scala.util.parsing.combinator.Parsers
	import scala.util.parsing.input.{NoPosition, Reader}
	import javax.xml.stream.events.{Attribute, EndElement, XMLEvent}
	import javax.xml.stream.{XMLEventReader, XMLInputFactory}
	import collection.mutable.ArrayBuffer
	import java.io.File

	/**
	* License: MIT license (http://www.opensource.org/licenses/mit-license.php)
	* Copyright (C) 2011 by Capital IQ
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to deal
	* in the Software without restriction, including without limitation the rights
	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	* copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in
	* all copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	* THE SOFTWARE.
	*
	* @author Anthony Enache, Paul Chiusano
	*/

	/** Adapter to run parser combinators using XMLEvent as the token type.
	* Currently works by grabbing the full list of events up front from a
	* javax.xml.stream.XMLEventReader, so this is not suitable for streaming.
	*
	* Examples:
	* startElement("foo") <~ endElement("foo") applied to <foo bar="qux" baz="juju"></foo>
	* results in the Map[String,String] of attrs for the tag 'foo': Map("bar"->"qux", "baz"->"juju")
	*
	* startElement("foo") ~ rep(bar) <~ endElement("foo") map { case attrs ~ bars => ... }
	*
	* Generally, it's better to use the nest and element combinators, to avoid having to
	* manually match start and end tags in the grammar.
	*/
	trait XMLEventParser extends Parsers {
	type Elem = XMLEvent

	implicit def listToJavaList[T](l: Seq[T]) = l.foldLeft(new java.util.ArrayList[T](l.size)) {
	(al, e) => al.add(e)
	al
	}

	/** We return a map of the attributes defined by the start element tag. */
	def startElement(s: String): Parser[Map[String,String]] = new Parser[Map[String,String]] {
	def apply(in: Input) = {
	val elt = in.first

	if ( (elt ne null) && elt.isStartElement && elt.asStartElement.getName.getLocalPart == s) {
	val as = elt.asStartElement.getAttributes
	var attributes = Map[String, String]()

	while( as.hasNext ) {
	val attr = as.next.asInstanceOf[Attribute]
	attributes += ( attr.getName.getLocalPart -> attr.getValue )
	}

	Success(attributes, in.rest)
	} else {
	Failure("Expected start element with label " + s + ", but found " + elt, in.rest)
	}
	}
	}

	def endElement(s: String) : Parser[EndElement] = new Parser[EndElement] {
	def apply(in: Input) = {
	val elt = in.first
	if ( elt.isEndElement && elt.asEndElement.getName.getLocalPart == s) {
	Success(elt.asEndElement, in.rest)
	} else {
	Failure("Expected end element with label " + s + ", but found " + elt, in.rest)
	}
	}
	}

	/** Parser for leaf elements of type <tag attr1="" attr2="" .../>. From the reader, this would generate
	* a start element and end element event, of which, only the start element is truly interesting as it
	* carries the attributes with it.
	*/
	def element(s: String) : Parser[Map[String, String]] = startElement(s) <~ endElement(s)

	/** Parser for non-leaf elements. */
	def element[A,B](s: String, inner: Parser[A])(f: ((Map[String,String],A)) => B): Parser[B] =
	startElement(s) ~ inner <~ endElement(s) ^^ { case attrs ~ a => f((attrs, a)) }

	/** Parser for leaf elements with attributes. The function f extracts an A from these attrs. */
	def element[A](s: String, f: Map[String,String] => A): Parser[A] =
	startElement(s).map(f) <~ endElement(s)

	/** Parser for elements surrounded by the tag withinTag, whose attributes are ignored. */
	def nest[A](withinTag: String, inner: Parser[A]): Parser[A] =
	element[A,A](withinTag, inner) { case (attrs,a) => a }

	/** Typesafe choice between two parsers. */
	def choice[A,B](p: Parser[A], p2: Parser[B]): Parser[Either[A,B]] =
	p.map(a => Left(a)) \| p2.map(b => Right(b))

	/** Parser for text within a tag. Example:
	* nest("foo", textElement) applied to '<foo>thisText</foo>' results in 'thisText'.
	*/
	def textElement: Parser[String] = new Parser[String] {
	def apply(in: Input) = {
	val elt = in.first
	if (elt.isCharacters) {
	Success(elt.asCharacters.getData, in.rest)
	} else {
	Failure("Expected text element, but found " + elt, in.rest)
	}
	}
	}

	/** Parser that extracts the text from an element <s>text</s> */
	def textOnlyElement(s: String): Parser[String] = nest(s, textElement)

	/** Parser that extracts an optional text only elelemnt. */
	def optionalTextOnlyElement(s: String): Parser[Option[String]] = nest(s, opt(textElement))

	/** Parse some prefix of reader `in' with parser `p' */
	def parse[T](p: Parser[T], in: Reader[XMLEvent]): ParseResult[T] =
	p(in)

	/** Parse all of reader `in' with parser `p' */
	def parseAll[T](p: Parser[T], in: Reader[XMLEvent]): ParseResult[T] =
	parse(phrase(p), in)

	/** Extract the mandatory attribute from the attribute map */
	def attribute(as: Map[String, String], a: String) = {
	require(as.contains(a))
	as(a)
	}

	/** Extract the value of the attribute, if it exists, otherwise use the default */
	def attribute(as: Map[String, String], a: String, default: String) = as.getOrElse(a, default)
	}

	object EventReader {
	def readEvents(r: java.io.Reader): Array[XMLEvent] = {
	var result = new ArrayBuffer[XMLEvent]()
	val factory = XMLInputFactory.newInstance()
	val reader : XMLEventReader = factory.createXMLEventReader(r)

	while (reader.hasNext) {
	var event = reader.nextEvent()

	if ( !event.isStartDocument && !event.isEndDocument ) {
	if ( event.isCharacters ) {
	if ( !event.asCharacters.isWhiteSpace ) {
	result += event
	}
	} else {
	result += event
	}
	}
	}
	result.toArray
	}
	}

	/**
	* Note that this implementation of the header will eat whitespace including document start and end events
	*/
	class EventReader(index: Int, events: Array[XMLEvent] ) extends Reader[XMLEvent] {
	def this(r: java.io.Reader) = this(0, EventReader.readEvents(r))
	def atEnd = index == events.size - 1
	def first = events(index)
	def rest = if ( atEnd ) this else new EventReader(index + 1, events)
	def pos = NoPosition
	}