Created
April 3, 2013 22:44
-
-
Save travisbrown/5306155 to your computer and use it in GitHub Desktop.
Quick parser combinator demo.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sealed trait Element | |
case class Tag(name: String, attr: String) extends Element | |
case class Header(text: String) extends Element | |
case class Plain(text: String) extends Element | |
import scala.util.parsing.combinator._ | |
object parse extends RegexParsers { | |
override def skipWhitespace = false | |
def tag: Parser[Tag] = ("<" ~> "\\S+".r <~ "\\s".r) ~ "[^\\s>]+".r <~ ">" ^^ { | |
case name ~ attr => Tag(name, attr) | |
} | |
def header: Parser[Header] = "\\-".r ~> "[^+]*".r <~ "+" ^^ (Header(_)) | |
def plain: Parser[Plain] = "[^<>\\-\\+]+".r ^^ (Plain(_)) | |
def elements: Parser[List[Element]] = rep(tag | header | plain) | |
def apply(s: String) = parseAll(elements, s) | |
} | |
val text = io.Source.fromFile( | |
"corrections/edF.txt" | |
).getLines.take(10000).mkString(" ") | |
parse(text).get foreach println |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment