Skip to content

Instantly share code, notes, and snippets.

@sasaki-shigeo
Created January 30, 2014 01:23
Show Gist options
  • Save sasaki-shigeo/8700925 to your computer and use it in GitHub Desktop.
Save sasaki-shigeo/8700925 to your computer and use it in GitHub Desktop.
incomplete XML parser / Scala の parser combinator で XML の構文解析器を作ったが,まだ不完全
import scala.util.parsing.combinator._
abstract class XML
case class Element(name: String, attributes: Map[String, String], contents: Seq[XML]) extends XML
case class Letter(c: Char) extends XML
case class CDATA(text: String) extends XML
case class CharEntity(c: Char) extends XML
case class Entity(c: Char) extends XML
case class Comment(text: String) extends XML
object parseXML extends RegexParsers {
def name = "[a-zA-Z]+".r
def value = """'(.*?)'|"(.*?)"""".r
def attribute = name~"="~value ^^ {
case x~"="~y => (x, y)
}
def startTag = "<"~>name~rep(attribute)<~">" ^^ {
case x~y => (x, Map() ++ y)
}
def emptyTag = "<"~>name~rep(attribute)<~"/>" ^^ {
case x~y => Element(x, Map() ++ y, Seq())
}
def endTag = "</"~>name<~">"
def element: Parser[Element] = startTag~rep(content)~endTag ^^ {
case (name, attribs)~contents~_ => Element(name, attribs, contents)
}
def content = (
element
| letter^^{s: String => Letter(s.charAt(0))}
| cdata
| charEntity
)
def letter = "[^&<>]".r
def charEntity = "&#"~>"[0-9a-fA-F]+".r<~";" ^^ {hex: String => CharEntity(Integer.parseInt(hex, 16).toChar)}
def cdata = "<![CDATA["~>"""[^\]]*""".r<~"]]>" ^^ { CDATA(_) }
def apply(source: String) = println(parseAll(content, source))
def main(args: Array[String]) {
println("input: " + args(0))
println(apply(args(0)))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment