Created
April 7, 2016 16:35
-
-
Save tyrcho/9ff10b5a70dd9ed8a1780ad26ff6fede to your computer and use it in GitHub Desktop.
Filter an iterator by keeping only the lines which are unique by a transformation function
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.collection.AbstractIterator | |
import scala.collection.Iterator | |
object UniqueBy extends App { | |
val lines = io.Source.fromFile("""somefile""").getLines | |
uniqueBy(lines)(_.trim.headOption.getOrElse("")).foreach(println) | |
def uniqueBy[T, U](it: Iterator[T])(pred: T => U) = new AbstractIterator[T] { | |
var seen = Set.empty[U] | |
var n = if (it.hasNext) Some(it.next()) else None | |
def hasNext = n.nonEmpty | |
def next() = { | |
val res = n.get | |
seen += pred(res) | |
n = None | |
while (it.hasNext && n.isEmpty) { | |
val nn = it.next() | |
if (!seen.contains(pred(nn))) n = Some(nn) | |
} | |
res | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment