Last active
August 29, 2015 14:00
-
-
Save stew/11145989 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package scalaz.example | |
object RunLengthEncoder extends App { | |
import scalaz._ | |
import scalaz.std.option._ | |
import scalaz.std.anyVal._ | |
import scalaz.syntax.show._ | |
import scalaz.syntax.apply._ | |
/* | |
We create a simple langauage named CAB | |
Strings in the CAB language is a series of A, B, or C tokens followed by a EOF: | |
<cab> ::= 'C' | 'A' | 'B' | |
<string> ::= <cab> <string> | '.' | |
So valid strings in the langauge would be: | |
"." | |
"A." | |
"B." | |
"ABC." | |
"AAAABBCABC." | |
*/ | |
sealed trait Token | |
case object C extends Token | |
case object A extends Token | |
case object B extends Token | |
object Token { | |
implicit val euqalsRef: Equal[Token] = Equal.equalRef | |
implicit val showTok: Show[Token] = new Show[Token] { | |
override def show(t: Token) = t match { | |
case A ⇒ Cord("A") | |
case B ⇒ Cord("B") | |
case C ⇒ Cord("C") | |
} | |
} | |
} | |
/* | |
We create an extension of the CAB language that allows compression via run length encoding. In compressed CAB, a token T can be prefixed by a number N to stand for "N Ts in a row": | |
<cab> ::= 'C' | 'A' | "B' | |
<tok> ::= <cab> | <integer> <cab> | |
<compressed> ::= <tok> <compressed> | "." | |
so the string "CAAAAAB." could be "compressed" any of the following ways: | |
"CAAAAAB." | |
"C5AB." | |
"C3AAAB." | |
"1C5CB." | |
*/ | |
// running state | |
case class RunLengthState( | |
lastToken: Option[Token], // what is the last char we've seen | |
run: Int, // how many of that char have we seen | |
input: List[Token] // remaining input | |
) { | |
def incRun = this.copy(run = run + 1) | |
def newToken(t: Token) = RunLengthState(Some(t), 1, input) | |
def uncons: (Token, RunLengthState) = (input.head, this.copy(input = input.tail)) | |
} | |
// the configuration of our encoder, this will be used in the Reader part of our RWST | |
case class RunLengthConfig( | |
/** | |
if we are emitting less than minRun tokens, just emit them as individual tokens instead of as a run of tokens | |
*/ | |
minRun: Int | |
) | |
// the monoid we will use for logging | |
import RunLengthEncoder._ | |
import Token._ | |
import Free.Trampoline | |
type RunLength[A] = ReaderWriterStateT[Trampoline, RunLengthConfig, Cord, RunLengthState, A] | |
val rle = ReaderWriterStateT.rwstMonad[Trampoline, RunLengthConfig, Cord, RunLengthState] | |
import rle._ | |
// read a token from the input | |
val readToken: RunLength[Token] = | |
for { | |
config ← get | |
(next, state) = config.uncons | |
_ ← put(state) | |
} yield next | |
/** | |
emit the lastToken | |
*/ | |
def emit: RunLength[Unit] = | |
for { | |
state ← get | |
config ← ask | |
_ ← { | |
state.lastToken map { t ⇒ | |
if(state.run <= config.minRun) { | |
tell(Monoid[Cord].multiply(t.show, state.run)) | |
} else { | |
tell(state.run.show ++ t.show) | |
} | |
} getOrElse { | |
point(()) | |
} | |
} | |
} yield () | |
/** | |
have we exhausted the input? | |
*/ | |
def done: RunLength[Boolean] = get flatMap { state ⇒ | |
if(state.input.isEmpty) { | |
emit as true | |
} else point(false) | |
} | |
/** | |
emit tokens if the next input token is different than the last | |
*/ | |
def maybeEmit: RunLength[Unit] = | |
for { | |
state ← get | |
next ← readToken | |
_ ← { if(state.lastToken.map(_ == next) getOrElse(false)) | |
rle.modify(_.incRun) | |
else | |
(emit *> put(state.newToken(next))) | |
} | |
} yield () | |
val (out, _, state) = untilM_(maybeEmit, done).run(RunLengthConfig(2), | |
RunLengthState(lastToken = None, | |
run = 0, | |
input = List(A,B,C,A,A,B,C,C,B,B,B,A,B,C,A,A,B))).run | |
println(out.shows) | |
println(state) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment