Last active
August 29, 2015 13:59
-
-
Save zach-klippenstein/10450985 to your computer and use it in GitHub Desktop.
Playing around with some different implementations of a clumping algorithm in Scala.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This is my first attempt at implementing clumping – once with folding, | |
once with iterators. Clumping is what I've called grouping elements by | |
an arbitrary function on them, much like groupBy, but preserving their order. | |
e.g. [a,a,b,a,a,a].clump == [[a,a],[b],[a,a,a]] | |
I'm just getting into functional programming, so this might have a different name. | |
While the folding method is more "functionally" and concise, I'm not certain | |
it provides the same laziness as the iterator one. | |
I'm not sure exactly what calling view on an iterable does. I imagine it's unnecessary, | |
and foldLeft will be lazy if the source iterable is lazy (a stream, view, or iterator). | |
I have not verified this however. | |
I'm not convinced the latter is that much more reasonable though. | |
As long as span() doesn't evaluate the second term of the returned tuple, | |
it is much more obvious that it is lazy. | |
*/ | |
class FoldingClumpingIterable[T](src: Iterable[T]) { | |
def clump[K](k: (T) => K): Iterable[List[T]] = src.view.foldLeft(List[List[T]]()) { | |
(clumps: List[List[T]], e: T) => clumps match { | |
// Non-initial clump | |
case init :+ prevClump => prevClump match { | |
// Current item belongs in current clump | |
case s :: _ if k(s) == k(e) => init :+ (prevClump :+ e) | |
// Current item belongs in new clump | |
case _ => clumps :+ List(e) | |
} | |
// Initial clump | |
case _ => List(List(e)) | |
} | |
} | |
} | |
class IteratorClumpingIterable[T](src: Iterable[T]) { | |
def clump[K](k: (T) => K): Iterable[List[T]] = new Iterable[List[T]] { | |
override def iterator = new Iterator[List[T]] { | |
var it = src.iterator.buffered | |
// Think of this like a while loop in a Pythonic generator | |
override def next(): List[T] = { | |
val clumpKey = k(it.head) | |
// Span partitions by (p == true, p == false) | |
val (clump, newIt) = it.span(e => k(e) == clumpKey) | |
it = newIt.buffered | |
clump.toList | |
} | |
override def hasNext: Boolean = it.hasNext | |
} | |
} | |
} | |
val testList = List(1,1,2,2,3,3,4,4,4,5,5,5) | |
val clumpedList = List(List(1, 1), List(2, 2), List(3, 3), List(4, 4, 4), List(5, 5, 5)) | |
val foldTestList = new FoldingClumpingIterable(testList) | |
foldTestList.clump(e => e) | |
assert(foldTestList.clump(e => e).toList == clumpedList) | |
foldTestList.clump(e => e).take(3).toList | |
val itrTestList = new IteratorClumpingIterable(testList) | |
itrTestList.clump(e => e) | |
assert(itrTestList.clump(e => e).toList == clumpedList) | |
itrTestList.clump(e => e).take(3).toList | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment