Skip to content

Instantly share code, notes, and snippets.

@yanana
Forked from satabin/Monadic.scala
Created October 8, 2015 09:18
Show Gist options
  • Select an option

  • Save yanana/df068e4a350a6af6b987 to your computer and use it in GitHub Desktop.

Select an option

Save yanana/df068e4a350a6af6b987 to your computer and use it in GitHub Desktop.
better monadic parser combinator with lazy evaluation for better performance
package gnieh.tex
import scala.language.higherKinds
/** A generic monadic interface. By implementing this trait, one can use
* this data together in for-comprehensions
*
* @author Lucas Satabin
*/
trait Monadic[+T, M[+T] <: Monadic[T, M]] {
self =>
class WithFilter(p: T => Boolean) {
@inline
def map[U](f: T => U): M[U] =
self filter p map f
@inline
def flatMap[U](f: T => M[U]): M[U] =
self filter p flatMap f
/*@inline
def foreach(f: T => Unit): Unit =
self filter p foreach f*/
def withFilter(q: T => Boolean): WithFilter =
new WithFilter(t => p(t) && q(t))
}
def map[U](f: T => U): M[U]
def flatMap[U](f: T => M[U]): M[U]
//def foreach(f: T => Unit): Unit
def filter(p: T => Boolean): M[T]
def withFilter(p: T => Boolean): WithFilter =
new WithFilter(p)
}
package gnieh.tex
import scala.util.parsing.input.Position
/** Parser combinators implementation based on the paper *Parsec: Direct Style Monadic Parser Combinators For The Real World*
*
* @author Lucas Satabin
*/
trait Parsec[Token, Pos <: Position] {
/** Computes the next position from the current position and read token */
protected def nextPos(current: Pos, read: Token): Pos
/** The result returned by a paser.
*
* @author Lucas Satabin
*/
sealed abstract class Result[+T] {
def reply: Reply[T]
override def toString = reply.toString
}
/** A result that actually consumed some tokens in the input stream
*
* @author Lucas Satabin
*/
class Consumed[T](rep: =>Reply[T]) extends Result[T] {
// laziness of reply parameter allows for efficient choice implementation
lazy val reply = rep
}
/** Companion object to build and extract data
*
* @author Lucas Satabin
*/
object Consumed {
def apply[T](rep: =>Reply[T]) = new Consumed(rep)
def unapply[T](res: Result[T]): Option[Reply[T]] = res match {
case cons: Consumed[T] => Some(cons.reply)
case _ => None
}
}
/** A result that did not consume any token in input stream
*
* @author Lucas Satabin
*/
case class Empty[T](reply: Reply[T]) extends Result[T]
/** A positioned error message with an optional encountered unexpected token and a
* list of expected messages.
*
* @author Lucas Satabin
*/
case class Message(pos: Pos, unexpected: Option[Token], expected: List[String]) {
override def toString = {
val found = unexpected map (_.toString) getOrElse "EOI"
val expect = expected.mkString("expected: ", " or ", "")
"Error at " + pos + "\n" + pos.longString + "\nfound: " + found + "\n" + expect
}
}
/** An actual parsing result with a (possibly empty) error message
*
* @author Lucas Satabin
*/
sealed trait Reply[+T] extends Monadic[T, Reply] {
val message: Message
}
/** A parsing result that corresponds to a success
*
* @author Lucas Satabin
*/
case class Success[T](value: T, rest: Input, message: Message) extends Reply[T] {
def filter(p: T => Boolean): Reply[T] =
if(p(value))
this
else
Error(message)
def flatMap[U](f: T => Reply[U]): Reply[U] =
f(value)
def map[U](f: T => U): Reply[U] =
Success(f(value), rest, message)
override def toString = value.toString
}
/** A parsing result that corresponds to an error
*
* @author Lucas Satabin
*/
case class Error(message: Message) extends Reply[Nothing] {
def filter(p: Nothing => Boolean): Reply[Nothing] =
this
def flatMap[U](f: Nothing => Reply[U]): Reply[U] =
this
def map[U](f: Nothing => U): Reply[U] =
this
override def toString = message.toString
}
/** The input state with input stream and current position
*
* @author Lucas Satabin
*/
case class Input(stream: Stream[Token], pos: Pos)
/** A monadic parser (function from input to result)
*
* @author Lucas Satabin
*/
abstract class Parser[+T] extends (Input => Result[T]) with Monadic[T, Parser] {
self =>
/** Binds the result of this parser to a function producing another parser.
* This is the basic operation used to build sequences. */
def >>=[U](fun: T => Parser[U]): Parser[U] =
new Parser[U] {
def apply(input: Input): Result[U] = self(input) match {
case Empty(reply1) => reply1 match {
case Success(value, rest, msg) =>
// this parser succeeded without consuming any token
fun(value)(rest)
case Error(msg) =>
// this parser failed to be applied
Empty(Error(msg))
}
case Consumed(reply1) =>
// this parser consumed some token, immediately return a `Consumed` token
// with lazy second result parameter
Consumed(reply1 match {
case Success(value, rest, _) => fun(value)(rest).reply
case Error(msg) => Error(msg)
})
}
}
/** Applies transformation on the value resulting from this parser. */
def fmap[U](fun: T => U): Parser[U] =
new Parser[U] {
def apply(input: Input): Result[U] = self(input) match {
case Consumed(Success(value, rest, msg)) =>
Consumed(Success(fun(value), rest, msg))
case Consumed(Error(msg)) =>
Consumed(Error(msg))
case Empty(Success(value, rest, msg)) =>
Empty(Success(fun(value), rest, msg))
case Empty(Error(msg)) =>
Empty(Error(msg))
}
}
/** Synonym for `>>=`, making it possible to use for-comprehension on parsers. */
@inline
def flatMap[U](fun: T => Parser[U]): Parser[U] =
this >>= fun
/** Synonym for `fmap` making it possible to use for-comprehension on parsers. */
@inline
def map[U](fun: T => U): Parser[U] =
fmap(fun)
/** Parser that succeeds if `this` parser succeeds and the predicates holds on the
* produced value. if `this` parser succeeds but the predicate is not satisfied,
* an error result is returned. If `this` failed, the original failing result is
* returned */
def filter(p: T => Boolean): Parser[T] =
new Parser[T] {
def apply(input: Input): Result[T] = self(input) match {
case res @ Empty(Success(value, _, _)) if p(value) =>
res
case res @ Consumed(Success(value, _, _)) if p(value) =>
res
case Empty(Success(_, _, _)) | Consumed(Success(_, _, _)) =>
Empty(Error(Message(input.pos, input.stream.headOption, Nil)))
case other =>
other
}
}
/** Deterministic choice. If this parser fails, the second is tried.
* It implements the 'longest match' rule if `this` parser succeeds without consuming
* any input, and `that` succeeds by consuming some input, then the second result is taken */
def <|>[U >: T](that: Parser[U]): Parser[U] =
new Parser[U] {
def apply(input: Input): Result[U] = self(input) match {
case Empty(Error(msg1)) =>
that(input) match {
case Empty(Error(msg2)) =>
mergeError(msg1, msg2)
case Empty(Success(token, rest, msg2)) =>
mergeSuccess(token, rest, msg1, msg2)
case consumed =>
consumed
}
case Empty(Success(value, rest, msg1)) =>
that(input) match {
case Empty(Error(msg2)) =>
mergeSuccess(value, rest, msg1, msg2)
case Empty(Success(_, _, msg2)) =>
mergeSuccess(value, rest, msg1, msg2)
case consumed =>
consumed
}
case consumed =>
consumed
}
}
/** Parser that accepts the same input as `this` parser, but set the expected productions
* to `msg` when `this` fails without consuming any input. */
def <#>(msg: String): Parser[T] =
new Parser[T] {
def apply(input: Input) = self(input) match {
case Empty(Error(Message(pos, unexp, _))) =>
Empty(Error(Message(pos, unexp, List(msg))))
case res =>
res
}
}
}
/** Parser that always succeeds without consuming any value */
def success[T](value: T): Parser[T] =
new Parser[T] {
def apply(input: Input): Result[T] =
// ok without consuming anything
Empty(Success(value, input, Message(input.pos, None, Nil)))
}
/** Parser that always fails with the given message and wihout consuming any input */
def fail(msg: String): Parser[Nothing] =
new Parser[Nothing] {
def apply(input: Input): Result[Nothing] =
Empty(Error(Message(input.pos, input.stream.headOption, List(msg))))
}
/** Parser that succeeds if the next token satisfies the predicate.
* The token is consumed when the parser succeeds */
def satisfy(p: Token => Boolean): Parser[Token] =
new Parser[Token] {
def apply(input: Input): Result[Token] = input.stream match {
case token #:: rest if p(token) =>
val pos1 = nextPos(input.pos, token)
val input1 = Input(rest, pos1)
Consumed(Success(token, input1, Message(input.pos, None, Nil)))
case token #:: rest =>
// the token does not satisfy the predicate
Empty(Error(Message(input.pos, Some(token), Nil)))
case _ =>
Empty(Error(Message(input.pos, None, Nil)))
}
}
/** Parser that succeeds just like `p`, but pretends that no input was consumed when `p` fails */
def attempt[T](p: =>Parser[T]): Parser[T] =
new Parser[T] {
def apply(input: Input): Result[T] = p(input) match {
case Consumed(Error(msg)) => Empty(Error(msg))
case res => res
}
}
/** Parser that succeeds with a list of at least one element recognized by parser `p` */
def many1[T](p: =>Parser[T]): Parser[List[T]] = {
lazy val p1 = p
for {
x <- p1
xs <- many1(p1) <|> success(List())
} yield x :: xs
}
def run[T](p: Parser[T], in: Input): Reply[T] = p(in).reply
// ========== internals ==========
/* merges both error messages into one error result */
private def mergeError(msg1: Message, msg2: Message): Empty[Nothing] =
Empty(Error(merge(msg1, msg2)))
/* merges both message as a succesful empty result */
private def mergeSuccess[T](value: T, rest: Input, msg1: Message, msg2: Message): Empty[T] =
Empty(Success(value, rest, merge(msg1, msg2)))
private def merge(msg1: Message, msg2: Message) = (msg1, msg2) match {
case (Message(pos, tok, expected1), Message(_, _, expected2)) =>
Message(pos, tok, expected1 ++ expected2)
}
}
package gnieh.tex
import scala.util.parsing.input.Position
/** A position that has no context in the stream for what was read before and
* what comes next. It is only aware of the current token
*
* @author Lucas Satabin
*/
case class StreamPosition[T](source: Stream[T], offset: Int) extends Position {
val line = 1
val column = offset + 1
def lineContents: String =
source.headOption.map(_.toString).getOrElse("<EOI>")
/** Returns a string representation of the `Position`, of the form `line.column`. */
override def toString = "@" + offset
override def <(that: Position) = that match {
case StreamPosition(_, that_offset) =>
this.offset < that_offset
case _ =>
super.<(that)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment