Skip to content

Instantly share code, notes, and snippets.

@kiritsuku
Created September 12, 2012 20:25
Show Gist options
  • Select an option

  • Save kiritsuku/3709623 to your computer and use it in GitHub Desktop.

Select an option

Save kiritsuku/3709623 to your computer and use it in GitHub Desktop.
try to write a token based parser combinator
import scala.util.parsing.combinator.JavaTokenParsers
import scala.util.matching.Regex
import scala.util.parsing.input.OffsetPosition
import Tokens._
object T1 extends App with P {
val source = "abc = def\nabcd=def\nabc\t= \t\t defg"
val parsed = parseAll(lines, source) match {
case Success(res, _) => res
case NoSuccess(msg, _) => Nil
}
val flatten = parsed flatMap {
case a ~ b ~ c ~ d ~ e ~ f => List(a,b,c,d,e,f)
}
val tokens = flatten.filter(_.length != 0).toVector
println(tokens)
}
trait P extends TParsers {
override val skipWhitespace = false
lazy val itok =
rx("""[a-zA-Z_]\w*""".r) ^^ { case Token(t, pos, len) => IdentToken(t, pos, len) }
lazy val line =
whiteSpaces ~ itok ~ whiteSpaces ~ eqs ~ whiteSpaces ~ itok
lazy val lines =
rep(line)
lazy val eqs =
rx("=".r) ^^ { case Token(a,b,c) => KeywordToken(a,b,c) }
lazy val whiteSpaces =
rx("""\s*""".r) ^^ { case Token(a,b,c) => WhiteSpaceToken(a,b,c) }
}
object Tokens {
trait Token[A] {
def value: A
def pos: OffsetPosition
def length: Int
}
object Token {
def apply[A](tokValue: A, tokPos: OffsetPosition, tokLength: Int) =
new Token[A] { val value = tokValue; val pos = tokPos; val length = tokLength }
def unapply[A](t: Token[A]): Option[(A, OffsetPosition, Int)] =
Some(t.value, t.pos, t.length)
}
case class IdentToken(value: String, pos: OffsetPosition, length: Int) extends Token[String]
case class IntToken(value: Int, pos: OffsetPosition, length: Int) extends Token[Int]
case class KeywordToken(value: String, pos: OffsetPosition, length: Int) extends Token[String]
case class WhiteSpaceToken(value: String, pos: OffsetPosition, length: Int) extends Token[String]
}
trait TParsers extends JavaTokenParsers {
def rx(r: Regex): Parser[Token[String]] = new Parser[Token[String]] {
def apply(in: Input) = {
val source = in.source
val offset = in.offset
val start = handleWhiteSpace(source, offset)
(r findPrefixMatchOf (source.subSequence(start, source.length))) match {
case Some(matched) =>
val t = Token(
source.subSequence(start, start+matched.end).toString,
in.drop(start-offset).pos.asInstanceOf[OffsetPosition],
matched.end
)
Success(t, in.drop(start+matched.end-offset))
case None =>
val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'"
Failure("string matching regex `"+r+"' expected but "+found+" found", in.drop(start - offset))
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment