Created
September 12, 2012 20:25
-
-
Save kiritsuku/3709623 to your computer and use it in GitHub Desktop.
try to write a token based parser combinator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import scala.util.parsing.combinator.JavaTokenParsers | |
| import scala.util.matching.Regex | |
| import scala.util.parsing.input.OffsetPosition | |
| import Tokens._ | |
| object T1 extends App with P { | |
| val source = "abc = def\nabcd=def\nabc\t= \t\t defg" | |
| val parsed = parseAll(lines, source) match { | |
| case Success(res, _) => res | |
| case NoSuccess(msg, _) => Nil | |
| } | |
| val flatten = parsed flatMap { | |
| case a ~ b ~ c ~ d ~ e ~ f => List(a,b,c,d,e,f) | |
| } | |
| val tokens = flatten.filter(_.length != 0).toVector | |
| println(tokens) | |
| } | |
| trait P extends TParsers { | |
| override val skipWhitespace = false | |
| lazy val itok = | |
| rx("""[a-zA-Z_]\w*""".r) ^^ { case Token(t, pos, len) => IdentToken(t, pos, len) } | |
| lazy val line = | |
| whiteSpaces ~ itok ~ whiteSpaces ~ eqs ~ whiteSpaces ~ itok | |
| lazy val lines = | |
| rep(line) | |
| lazy val eqs = | |
| rx("=".r) ^^ { case Token(a,b,c) => KeywordToken(a,b,c) } | |
| lazy val whiteSpaces = | |
| rx("""\s*""".r) ^^ { case Token(a,b,c) => WhiteSpaceToken(a,b,c) } | |
| } | |
| object Tokens { | |
| trait Token[A] { | |
| def value: A | |
| def pos: OffsetPosition | |
| def length: Int | |
| } | |
| object Token { | |
| def apply[A](tokValue: A, tokPos: OffsetPosition, tokLength: Int) = | |
| new Token[A] { val value = tokValue; val pos = tokPos; val length = tokLength } | |
| def unapply[A](t: Token[A]): Option[(A, OffsetPosition, Int)] = | |
| Some(t.value, t.pos, t.length) | |
| } | |
| case class IdentToken(value: String, pos: OffsetPosition, length: Int) extends Token[String] | |
| case class IntToken(value: Int, pos: OffsetPosition, length: Int) extends Token[Int] | |
| case class KeywordToken(value: String, pos: OffsetPosition, length: Int) extends Token[String] | |
| case class WhiteSpaceToken(value: String, pos: OffsetPosition, length: Int) extends Token[String] | |
| } | |
| trait TParsers extends JavaTokenParsers { | |
| def rx(r: Regex): Parser[Token[String]] = new Parser[Token[String]] { | |
| def apply(in: Input) = { | |
| val source = in.source | |
| val offset = in.offset | |
| val start = handleWhiteSpace(source, offset) | |
| (r findPrefixMatchOf (source.subSequence(start, source.length))) match { | |
| case Some(matched) => | |
| val t = Token( | |
| source.subSequence(start, start+matched.end).toString, | |
| in.drop(start-offset).pos.asInstanceOf[OffsetPosition], | |
| matched.end | |
| ) | |
| Success(t, in.drop(start+matched.end-offset)) | |
| case None => | |
| val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'" | |
| Failure("string matching regex `"+r+"' expected but "+found+" found", in.drop(start - offset)) | |
| } | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment