Created
July 11, 2015 20:42
-
-
Save gauthamnair/cacd07b3b0f1a4ff72d0 to your computer and use it in GitHub Desktop.
transducers tic-tac
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//-- | |
// Clojure's Transducers | |
// (the simpler parts) | |
//-- | |
// References: | |
// Transducers by Rich Hickey on youtube | |
// There are also scala implementations in existence: | |
// (which are differently implemented from what we do here) | |
// https://github.com/knutwalker/transducers-scala | |
//-- | |
val input = List("1,2", "3,4,5", "NA", "6", "NA") | |
val expectedResult = List(1,2,3,4,5,6) | |
//-- | |
val input = List("1,2", "3,4,5", "NA", "6", "NA") | |
val expectedResult = List(1,2,3,4,5,6) | |
val result = | |
input | |
.filter(_ != "NA") | |
.flatMap(_.split(",")) | |
.map(_.toInt) | |
//-- | |
// we've implemented | |
def f(xs: List[String]): List[Int] | |
// + | |
// its composable | |
// direct for this use-case | |
// - | |
// needlessly specific | |
// allocates a new container every time. | |
//-- | |
// approach 1: | |
// Iterator => Iterator | |
//-- | |
def f(xs: Iterator[String]): Iterator[Int] = | |
xs | |
.filter(_ != "NA") | |
.flatMap(_.split(",")) | |
.map(_.toInt) | |
//-- | |
// build a driver | |
def run[A, B, F[_] <: Iterable[_]]( | |
xs: F[A], f: Iterator[A] => Iterator[B]): F[B] | |
//-- | |
// oh, right its not that simple :) | |
//-- | |
import scala.collection.generic.CanBuildFrom | |
def run[A, F[I] <: Iterable[I], B, That]( | |
xs: F[A], | |
f: Iterator[A] => Iterator[B])( | |
implicit cbf: CanBuildFrom[F[A],B,That]): That | |
//-- | |
// build a driver | |
def run[A, F[I] <: Iterable[I], B, That]( | |
xs: F[A], | |
f: Iterator[A] => Iterator[B])( | |
implicit cbf: CanBuildFrom[F[A],B,That]): That = { | |
val bIter: Iterator[B] = f(xs.iterator) | |
val builder = cbf(xs) | |
bIter.foreach{ builder += _ } | |
builder.result | |
} | |
//-- | |
// build a driver | |
def run[A, F[I] <: Iterable[I], B, That]( | |
xs: F[A], | |
f: Iterator[A] => Iterator[B])( | |
implicit cbf: CanBuildFrom[F[A],B,That]): That = { | |
... | |
} | |
run(List("1,2", "3,4,5", "NA", "6", "NA"), f) | |
// result: List(1, 2, 3, 4, 5, 6) | |
//-- | |
// approach 1: | |
// Process = Iterator => Iterator | |
// + | |
// familiar | |
// composeable | |
// no extra allocations | |
//-- | |
// approach 1: | |
// Process[A,B] = Iterator[A] => Iterator[B] | |
// + | |
// familiar | |
// composeable | |
// no extra allocations | |
// - | |
// unusable on real-time streams. | |
//-- | |
// a real time stream is not an iterator | |
realTimeStream.hasNext // maybe, maybe not? | |
//-- | |
// let's look at it one input at a time. | |
val input = List("1,2", "3,4,5", "NA", "6", "NA") | |
val expectedResult = List(1,2,3,4,5,6) | |
//-- | |
// let's look at it one input at a time. | |
val input = List("1,2", "3,4,5", "NA", "6", "NA") | |
val expectedResult = List(1,2,3,4,5,6) | |
// we can describe this transformation with: | |
def f(x: String): Iterator[Int] | |
//-- | |
// approach 2: | |
// Process[A,B] = A => Iterator[B] | |
trait Transformer[-A,+B] | |
extends Function1[A,Iterator[B]] | |
//-- | |
val input = List("1,2", "3,4,5", "NA", "6", "NA") | |
val expectedResult = List(1,2,3,4,5,6) | |
val process: Transformer[String,Int] = | |
filter[String](_ != "NA") andThen | |
mapCat[String, String](_.split(",")) andThen | |
map(_.toInt) | |
//-- | |
val input = List("1,2", "3,4,5", "NA", "6", "NA") | |
val expectedResult = List(1,2,3,4,5,6) | |
val process: Transformer[String,Int] = | |
filter[String](_ != "NA") andThen | |
mapCat[String, String](_.split(",")) andThen | |
map(_.toInt) | |
input.flatMap(process) | |
// result: List(1, 2, 3, 4, 5, 6) | |
//-- | |
// the only difference between Transformer[A,B] | |
// & regular A => Iterator[B] | |
// is convenience for composition | |
val first: A => Iterator[B] | |
val second: B => Iterator[C] | |
// want | |
val sequenced: A => Iterator[C] | |
//-- | |
trait Transformer[-A,+B] | |
extends Function1[A,Iterator[B]] { self => | |
def andThen[C](next: Transformer[B,C]): Transformer[A,C] = | |
new Transformer[A,C] { | |
def apply(a: A): Iterator[C] = | |
self.apply(a).flatMap(next) | |
} | |
} | |
//-- | |
def map[A,B](f: A => B): Transformer[A,B] = | |
new Transformer[A,B] { | |
def apply(a:A) = Iterator(f(a)) | |
} | |
//-- | |
def filter[A](f: A => Boolean): Transformer[A,A] = | |
new Transformer[A,A] { | |
def apply(a:A) = Iterator(a).filter(f) | |
} | |
//-- | |
def mapCat[A,B](f: A => TraversableOnce[B]): Transformer[A,B] = | |
new Transformer[A,B] { | |
def apply(a:A) = f(a).toIterator | |
} | |
//-- | |
// approach 2: | |
// Transformer[A,B] = A => Iterator[B] | |
// + | |
// fairly familiar | |
// composeable | |
// highly generic | |
// - | |
//-- | |
// approach 2: | |
// Transformer[A,B] = A => Iterator[B] | |
// + | |
// fairly familiar | |
// composeable | |
// generic | |
// - | |
// allocations!! | |
//-- | |
// Transformer[A,B] = A => Iterator[B] | |
// - | |
// allocations!! | |
filter[String](_ != "NA") andThen | |
mapCat[String, String](_.split(",")) andThen | |
map(_.toInt) | |
//-- | |
// Clojure's transducers | |
// + | |
// fairly familiar | |
// composeable | |
// generic | |
// no extra allocations | |
//-- | |
// Let's add a step to the problem. | |
val input = List("1,2", "3,4,5", "NA", "6", "NA") | |
val expectedResult = List(1,2,3,4,5,6) | |
//-- | |
// Let's add a step to the problem. | |
val input = List("1,2", "3,4,5", "NA", "6", "NA") | |
// val expectedResult = List(1,2,3,4,5,6) | |
val expectedResult = 1 + 2 + 3 + 4 + 5 + 6 | |
//-- | |
val input = List("1,2", "3,4,5", "NA", "6", "NA") | |
// val expectedResult = List(1,2,3,4,5,6) | |
val expectedResult = 1 + 2 + 3 + 4 + 5 + 6 // 21 | |
//-- | |
val input = List("1,2", "3,4,5", "NA", "6", "NA") | |
val expectedResult = 1 + 2 + 3 + 4 + 5 + 6 // 21 | |
val result = | |
input | |
.filter(_ != "NA") | |
.flatMap(_.split(",")) | |
.map(_.toInt) // List[Int] | |
.foldLeft(0)(_ + _) // 21 | |
//-- | |
def useInt(soFar: Int, elem: Int): Int = soFar + elem | |
input | |
.filter(_ != "NA") | |
.flatMap(_.split(",")) | |
.map(_.toInt) // List[Int] | |
.foldLeft(0)(useInt) // 21 | |
//-- | |
def useStringInt(soFar: Int, strInt: String): Int = | |
soFar + strInt.toInt | |
input | |
.filter(_ != "NA") | |
.flatMap(_.split(",")) | |
.map(_.toInt) // List[Int] | |
.foldLeft(0)(useInt) // 21 | |
//-- | |
def useStringInt(soFar: Int, strInt: String): Int = | |
soFar + strInt.toInt | |
input | |
.filter(_ != "NA") | |
.flatMap(_.split(",")) // List[String] | |
.foldLeft(0)(useStringInt) // 21 | |
//-- | |
def useCSVStringInt(soFar: Int, csvStrInt: String): Int = | |
strInt.split(",").foldLeft(soFar)(_ + _) | |
input | |
.filter(_ != "NA") | |
.flatMap(_.split(",")) // List[String] | |
.foldLeft(0)(useStringInt) // 21 | |
//-- | |
def useCSVStringInt(soFar: Int, csvStrInt: String): Int = | |
csvStrInt.split(",").foldLeft(soFar)(_ + _.toInt) | |
input | |
.filter(_ != "NA") // List[String] | |
.foldLeft(0)(useCSVStringInt) // 21 | |
//-- | |
def useNullableCSVStringInt(soFar: Int, str: String): Int = | |
str match { | |
case "NA" => soFar | |
case csvStrInt => csvStrInt.split(",").foldLeft(soFar)(_ + _.toInt) | |
} | |
input | |
.filter(_ != "NA") // List[String] | |
.foldLeft(0)(useCSVStringInt) // 21 | |
//-- | |
def useNullableCSVStringInt(soFar: Int, str: String): Int = | |
str match { | |
case "NA" => soFar | |
case csvStrInt => csvStrInt.split(",").foldLeft(soFar)(_ + _.toInt) | |
} | |
input | |
.foldLeft(0)(useNullableCSVStringInt) // 21 | |
//-- | |
def useNullableCSVStringInt(soFar: Int, str: String): Int = | |
str match { | |
case "NA" => soFar | |
case csvStrInt => csvStrInt.split(",").foldLeft(soFar)(_ + _.toInt) | |
} | |
input | |
.foldLeft(0)(useNullableCSVStringInt) // 21 | |
input | |
.filter(_ != "NA") | |
.flatMap(_.split(",")) | |
.map(_.toInt) | |
.foldLeft(0)(useInt) // 21 | |
//-- | |
// surely we can do better than this. | |
def useNullableCSVStringInt(soFar: Int, str: String): Int = | |
str match { | |
case "NA" => soFar | |
case csvStrInt => csvStrInt.split(",").foldLeft(soFar)(_ + _.toInt) | |
} | |
def useCSVStringInt(soFar: Int, csvStrInt: String): Int = | |
csvStrInt.split(",").foldLeft(soFar)(_ + _.toInt) | |
def useStringInt(soFar: Int, strInt: String): Int = | |
soFar + strInt.toInt | |
def useInt(soFar: Int, elem: Int): Int = soFar + elem | |
//-- | |
def useStringInt(soFar: Int, strInt: String): Int = | |
soFar + strInt.toInt | |
def useInt(soFar: Int, elem: Int): Int = soFar + elem | |
//-- | |
def useStringInt(soFar: Int, strInt: String): Int = | |
useInt(soFar, strInt.toInt) | |
def useInt(soFar: Int, elem: Int): Int = soFar + elem | |
//-- | |
def useCSVStringInt(soFar: Int, csvStrInt: String): Int = | |
csvStrInt.split(",").foldLeft(soFar)(_ + _.toInt) | |
def useStringInt(soFar: Int, strInt: String): Int = | |
useInt(soFar, strInt.toInt) | |
def useInt(soFar: Int, elem: Int): Int = soFar + elem | |
//-- | |
def useCSVStringInt(soFar: Int, csvStrInt: String): Int = | |
csvStrInt.split(",").foldLeft(soFar)(useStringInt) | |
def useStringInt(soFar: Int, strInt: String): Int = | |
useInt(soFar, strInt.toInt) | |
def useInt(soFar: Int, elem: Int): Int = soFar + elem | |
//-- | |
def useNullableCSVStringInt(soFar: Int, str: String): Int = | |
str match { | |
case "NA" => soFar | |
case csvStrInt => csvStrInt.split(",").foldLeft(soFar)(_ + _.toInt) | |
} | |
def useCSVStringInt(soFar: Int, csvStrInt: String): Int = | |
csvStrInt.split(",").foldLeft(soFar)(useStringInt) | |
def useStringInt(soFar: Int, strInt: String): Int = | |
useInt(soFar, strInt.toInt) | |
def useInt(soFar: Int, elem: Int): Int = soFar + elem | |
//-- | |
def useNullableCSVStringInt(soFar: Int, str: String): Int = | |
if (str != "NA") useCSVStringInt(soFar, str) else soFar | |
def useCSVStringInt(soFar: Int, csvStrInt: String): Int = | |
csvStrInt.split(",").foldLeft(soFar)(useStringInt) | |
def useStringInt(soFar: Int, strInt: String): Int = | |
useInt(soFar, strInt.toInt) | |
def useInt(soFar: Int, elem: Int): Int = soFar + elem | |
//-- | |
def useNullableCSVStringInt(soFar: Int, str: String): Int = | |
if (str != "NA") useCSVStringInt(soFar, str) else soFar | |
def useCSVStringInt(soFar: Int, csvStrInt: String): Int = | |
csvStrInt.split(",").foldLeft(soFar)(useStringInt) | |
def useStringInt(soFar: Int, strInt: String): Int = | |
useInt(soFar, strInt.toInt) | |
def useInt(soFar: Int, elem: Int): Int = soFar + elem | |
//-- | |
// let's generalize | |
def useInt(soFar: R, elem: Int): R | |
//-- | |
def useStringInt(soFar: R, elem: String): R = ??? | |
def useInt(soFar: R, elem: Int): R | |
//-- | |
def useStringInt(soFar: R, elem: String): R = { | |
val f: String => Int = _.toInt | |
useInt(soFar, f(elem)) | |
} | |
def useInt(soFar: R, elem: Int): R | |
//-- | |
def map[A,B,R](f: A => B)(useB: (R,B) => R): (R,A) => R | |
//-- | |
def map[A,B,R](f: A => B)(useB: (R,B) => R): (R,A) => R = | |
(r,a) => useB(r, f(a)) | |
//-- | |
def map[A,B,R](f: A => B)(useB: (R,B) => R): (R,A) => R = | |
(r,a) => useB(r, f(a)) | |
def useStringIntF: (Int,String) => Int = | |
map[String, Int, Int](_.toInt)(useInt(_,_)) | |
//-- | |
def map[A,B,R](f: A => B)(useB: (R,B) => R): (R,A) => R = | |
(r,a) => useB(r, f(a)) | |
def useStringIntF: (Int,String) => Int = | |
map[String, Int, Int](_.toInt)(useInt(_,_)) | |
useStringIntF(1,"30") // 31 | |
//-- | |
def map[A,B,R](f: A => B)(useB: (R,B) => R): (R,A) => R = | |
(r,a) => useB(r, f(a)) | |
//-- | |
def map[A,B,R](f: A => B)(useB: (R,B) => R): (R,A) => R = | |
(r,a) => useB(r, f(a)) | |
class MappingTransducer[A,B](f: A => B) { | |
... | |
} | |
//-- | |
def map[A,B,R](f: A => B)(useB: (R,B) => R): (R,A) => R = | |
(r,a) => useB(r, f(a)) | |
class MappingTransducer[A,B](f: A => B) { | |
def apply[R](useB: (R,B) => R): (R,A) => R = | |
(r,a) => useB(r, f(a)) | |
} | |
//-- | |
def map[A,B,R](f: A => B)(useB: (R,B) => R): (R,A) => R = | |
(r,a) => useB(r, f(a)) | |
class MappingTransducer[A,B](f: A => B) { | |
def apply[R](useB: (R,B) => R): (R,A) => R = | |
(r,a) => useB(r, f(a)) | |
} | |
val mapStringToInt = new MappingTransducer[String, Int](_.toInt) | |
//-- | |
def map[A,B,R](f: A => B)(useB: (R,B) => R): (R,A) => R = | |
(r,a) => useB(r, f(a)) | |
class MappingTransducer[A,B](f: A => B) { | |
def apply[R](useB: (R,B) => R): (R,A) => R = | |
(r,a) => useB(r, f(a)) | |
} | |
val mapStringToInt = new MappingTransducer[String, Int](_.toInt) | |
def useStringIntF: (Int,String) => Int = | |
mapStringToInt.apply(useInt(_,_)) | |
useStringIntF(1,"30") // 31 | |
//-- | |
def useNullableCSVStringInt(soFar: Int, str: String): Int = | |
if (str != "NA") useCSVStringInt(soFar, str) else soFar | |
def useCSVStringInt(soFar: Int, csvStrInt: String): Int | |
//-- | |
def filter[A,R](f: A => Boolean)(useA: (R,A) => R): (R,A) => R = | |
(r,a) => if (f(a)) useA(r, a) else r | |
//-- | |
class FilteringTransducer[A](f: A => Boolean) { | |
def apply[R](useA: (R,A) => R): (R,A) => R = | |
(r,a) => if (f(a)) useA(r, a) else r | |
} | |
// alternative in approach 2: allocations!! | |
//-- | |
trait Transducer[A,B] { | |
def apply[R](useB: (R,B) => R): (R,A) => R | |
} | |
// give me a consumer of Bs | |
// I'll give you a consumer of As. | |
//-- | |
trait Transducer[A,B] { | |
def apply[R](useB: (R,B) => R): (R,A) => R | |
} | |
def useNullableCSVStringInt(soFar: Int, str: String): Int = | |
if (str != "NA") useCSVStringInt(soFar, str) else soFar | |
// ^^ Filtering Transducer | |
def useCSVStringInt(soFar: Int, csvStrInt: String): Int = | |
csvStrInt.split(",").foldLeft(soFar)(useStringInt) | |
// ^^ MapCatting Transducer | |
def useStringInt(soFar: Int, strInt: String): Int = | |
useInt(soFar, strInt.toInt) | |
// ^^ Map Transducer | |
def useInt(soFar: Int, elem: Int): toInt = soFar + elem | |
//-- | |
trait Transducer[A,B] { | |
def apply[R](useB: (R,B) => R): (R,A) => R | |
def andThen[C](next: Transducer[B,C]): Transducer[A,C] | |
// I can transform a consumer of Bs to a consumer of As | |
// you can transform a consumer of Cs to a consumer of Bs | |
// let's work together. | |
} | |
//-- | |
trait Transducer[A,B] { | |
def apply[R](useB: (R,B) => R): (R,A) => R | |
def andThen[C](next: Transducer[B,C]): Transducer[A,C] = | |
// I can transform a consumer of Bs to a consumer of As | |
// you can transform a consumer of Cs to a consumer of Bs | |
// let's work together. | |
new Transducer[A,C] { | |
def apply[R](step: (R,C) => R): (R,A) => R = { | |
val bStep = next.apply(step) | |
self.apply(bStep) | |
} | |
} | |
} | |
//-- | |
def map[A,B](f: A => B): Transducer[A,B] = | |
new Transducer[A,B] { | |
def apply[R](step: (R,B) => R): (R,A) => R = | |
(r,a) => step(r, f(a)) | |
} | |
//-- | |
def map[A,B](f: A => B): Transducer[A,B] = | |
new Transducer[A,B] { | |
def apply[R](step: (R,B) => R): (R,A) => R = | |
(r,a) => step(r, f(a)) | |
} | |
def filter[A](f: A => Boolean): Transducer[A,A] = | |
new Transducer[A, A] { | |
def apply[R](step: (R,A) => R): (R,A) => R = | |
(r,a) => if (f(a)) step(r,a) else r | |
} | |
//-- | |
def map[A,B](f: A => B): Transducer[A,B] = | |
new Transducer[A,B] { | |
def apply[R](step: (R,B) => R): (R,A) => R = | |
(r,a) => step(r, f(a)) | |
} | |
def filter[A](f: A => Boolean): Transducer[A,A] = | |
new Transducer[A, A] { | |
def apply[R](step: (R,A) => R): (R,A) => R = | |
(r,a) => if (f(a)) step(r,a) else r | |
} | |
def mapCat[A,B](f: A => TraversableOnce[B]): Transducer[A,B] = | |
new Transducer[A,B] { | |
def apply[R](step: (R,B) => R): (R,A) => R = | |
(r,a) => f(a).foldLeft(r)(step) | |
} | |
//-- | |
val input = List("1,2", "3,4,5", "NA", "6", "NA") | |
val process: Transducer[String, Int] = | |
filter[String](_ != "NA") andThen | |
mapCat(_.split(",")) andThen | |
map(_.toInt) | |
//-- | |
val input = List("1,2", "3,4,5", "NA", "6", "NA") | |
val process: Transducer[String, Int] = | |
filter[String](_ != "NA") andThen | |
mapCat(_.split(",")) andThen | |
map(_.toInt) | |
def sum(xs: List[String]): Int = | |
xs.foldLeft(0)(process.apply(_ + _)) | |
// sum(input) = 21 | |
//-- | |
val input = List("1,2", "3,4,5", "NA", "6", "NA") | |
val process: Transducer[String, Int] = | |
filter[String](_ != "NA") andThen | |
mapCat(_.split(",")) andThen | |
map(_.toInt) | |
def build(xs: List[String]): Vector[Int] = | |
xs.foldLeft(Vector.empty[Int])(process.apply(_ :+ _)) | |
// build(input) = Vector(1, 2, 3, 4, 5, 6) | |
//-- | |
// Clojure's transducers | |
// + | |
// composeable | |
// ultra generic | |
// no extra allocations | |
// - | |
// programming them is tough (contraMap) | |
//-- | |
// Clojure's transducers | |
// + | |
// composeable | |
// ultra generic | |
// no extra allocations | |
// - | |
// programming them is tough (contraMap) | |
// stateful processes are tricky to write and use. | |
//-- | |
val have = List(1,10,2,20,3,30,4) | |
val want = Vector(11, 22, 33) // pair consecutive and sum, drop unpaired end. | |
//-- | |
val have = List(1,10,2,20,3,30,4) | |
val want = Vector(11, 22, 33) // pair consecutive and sum, drop unpaired end. | |
def pair[A]: Transducer[A,(A,A)] = | |
new Transducer[A, (A,A)] { | |
def apply[R](step: (R,(A,A)) => R): (R,A) => R = ??? | |
} | |
//-- | |
def pair[A]: Transducer[A,(A,A)] = | |
new Transducer[A, (A,A)] { | |
def apply[R](step: (R,(A,A)) => R): (R,A) => R = { | |
var cache: Option[A] = None // say hello to my mutable state | |
(r,a) => cache match { | |
case Some(a1) => | |
cache = None | |
step(r, (a1, a)) | |
case None => | |
cache = Some(a) | |
r | |
} | |
} | |
} | |
//-- | |
val process = | |
pair[Int] andThen | |
map { xs => (xs._1 + xs._2) } | |
def run(xs: List[Int]): Vector[Int] = | |
xs.foldLeft(Vector[Int]())(process(_ :+ _)) | |
val have = List(1,10,2,20,3,33,4) | |
run(have) // Vector(11, 22, 36) | |
run(have) // Vector(11, 22, 36) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment