Skip to content

Instantly share code, notes, and snippets.

@stew
Last active August 29, 2015 14:00
Show Gist options
  • Save stew/11145989 to your computer and use it in GitHub Desktop.
Save stew/11145989 to your computer and use it in GitHub Desktop.
package scalaz.example
object RunLengthEncoder extends App {
import scalaz._
import scalaz.std.option._
import scalaz.std.anyVal._
import scalaz.syntax.show._
import scalaz.syntax.apply._
/*
We create a simple langauage named CAB
Strings in the CAB language is a series of A, B, or C tokens followed by a EOF:
<cab> ::= 'C' | 'A' | 'B'
<string> ::= <cab> <string> | '.'
So valid strings in the langauge would be:
"."
"A."
"B."
"ABC."
"AAAABBCABC."
*/
sealed trait Token
case object C extends Token
case object A extends Token
case object B extends Token
object Token {
implicit val euqalsRef: Equal[Token] = Equal.equalRef
implicit val showTok: Show[Token] = new Show[Token] {
override def show(t: Token) = t match {
case A ⇒ Cord("A")
case B ⇒ Cord("B")
case C ⇒ Cord("C")
}
}
}
/*
We create an extension of the CAB language that allows compression via run length encoding. In compressed CAB, a token T can be prefixed by a number N to stand for "N Ts in a row":
<cab> ::= 'C' | 'A' | "B'
<tok> ::= <cab> | <integer> <cab>
<compressed> ::= <tok> <compressed> | "."
so the string "CAAAAAB." could be "compressed" any of the following ways:
"CAAAAAB."
"C5AB."
"C3AAAB."
"1C5CB."
*/
// running state
case class RunLengthState(
lastToken: Option[Token], // what is the last char we've seen
run: Int, // how many of that char have we seen
input: List[Token] // remaining input
) {
def incRun = this.copy(run = run + 1)
def newToken(t: Token) = RunLengthState(Some(t), 1, input)
def uncons: (Token, RunLengthState) = (input.head, this.copy(input = input.tail))
}
// the configuration of our encoder, this will be used in the Reader part of our RWST
case class RunLengthConfig(
/**
if we are emitting less than minRun tokens, just emit them as individual tokens instead of as a run of tokens
*/
minRun: Int
)
// the monoid we will use for logging
import RunLengthEncoder._
import Token._
import Free.Trampoline
type RunLength[A] = ReaderWriterStateT[Trampoline, RunLengthConfig, Cord, RunLengthState, A]
val rle = ReaderWriterStateT.rwstMonad[Trampoline, RunLengthConfig, Cord, RunLengthState]
import rle._
// read a token from the input
val readToken: RunLength[Token] =
for {
config ← get
(next, state) = config.uncons
_ ← put(state)
} yield next
/**
emit the lastToken
*/
def emit: RunLength[Unit] =
for {
state ← get
config ← ask
_ ← {
state.lastToken map { t ⇒
if(state.run <= config.minRun) {
tell(Monoid[Cord].multiply(t.show, state.run))
} else {
tell(state.run.show ++ t.show)
}
} getOrElse {
point(())
}
}
} yield ()
/**
have we exhausted the input?
*/
def done: RunLength[Boolean] = get flatMap { state ⇒
if(state.input.isEmpty) {
emit as true
} else point(false)
}
/**
emit tokens if the next input token is different than the last
*/
def maybeEmit: RunLength[Unit] =
for {
state ← get
next ← readToken
_ ← { if(state.lastToken.map(_ == next) getOrElse(false))
rle.modify(_.incRun)
else
(emit *> put(state.newToken(next)))
}
} yield ()
val (out, _, state) = untilM_(maybeEmit, done).run(RunLengthConfig(2),
RunLengthState(lastToken = None,
run = 0,
input = List(A,B,C,A,A,B,C,C,B,B,B,A,B,C,A,A,B))).run
println(out.shows)
println(state)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment