Created
January 30, 2014 01:23
-
-
Save sasaki-shigeo/8700925 to your computer and use it in GitHub Desktop.
incomplete XML parser / Scala の parser combinator で XML の構文解析器を作ったが,まだ不完全
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import scala.util.parsing.combinator._ | |
| abstract class XML | |
| case class Element(name: String, attributes: Map[String, String], contents: Seq[XML]) extends XML | |
| case class Letter(c: Char) extends XML | |
| case class CDATA(text: String) extends XML | |
| case class CharEntity(c: Char) extends XML | |
| case class Entity(c: Char) extends XML | |
| case class Comment(text: String) extends XML | |
| object parseXML extends RegexParsers { | |
| def name = "[a-zA-Z]+".r | |
| def value = """'(.*?)'|"(.*?)"""".r | |
| def attribute = name~"="~value ^^ { | |
| case x~"="~y => (x, y) | |
| } | |
| def startTag = "<"~>name~rep(attribute)<~">" ^^ { | |
| case x~y => (x, Map() ++ y) | |
| } | |
| def emptyTag = "<"~>name~rep(attribute)<~"/>" ^^ { | |
| case x~y => Element(x, Map() ++ y, Seq()) | |
| } | |
| def endTag = "</"~>name<~">" | |
| def element: Parser[Element] = startTag~rep(content)~endTag ^^ { | |
| case (name, attribs)~contents~_ => Element(name, attribs, contents) | |
| } | |
| def content = ( | |
| element | |
| | letter^^{s: String => Letter(s.charAt(0))} | |
| | cdata | |
| | charEntity | |
| ) | |
| def letter = "[^&<>]".r | |
| def charEntity = "&#"~>"[0-9a-fA-F]+".r<~";" ^^ {hex: String => CharEntity(Integer.parseInt(hex, 16).toChar)} | |
| def cdata = "<![CDATA["~>"""[^\]]*""".r<~"]]>" ^^ { CDATA(_) } | |
| def apply(source: String) = println(parseAll(content, source)) | |
| def main(args: Array[String]) { | |
| println("input: " + args(0)) | |
| println(apply(args(0))) | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment