Last active April 22, 2016 18:11
Asynchronous and streaming file word counter
import java.nio.file.{ Files, Path, Paths }
import resource._
import scala.collection.JavaConversions.iterableAsScalaIterable
import scala.concurrent.duration.Duration
import scala.concurrent.{ Await, Future }
object Main {
/** An asynchronous function (inspired by Finagle). */
type Async[A, B] = A => Future[B]
val listFiles: String Async Traversable[Path] = dirName =>
Future {
val pathsResource =
managed(Files newDirectoryStream (Paths get dirName)) map {
def lineWordCount(line: String): Int = (line split " ").length
val fileWordsCount: Path Async Int = path =>
Future {
val fileName = path.toString
val startTime = System.nanoTime
val fileLines =
managed(io.Source fromFile fileName) map (_.getLines.toIterable)
val wordsCount = (fileLines.toTraversable map lineWordCount).sum
val elapsedTime = (System.nanoTime - startTime).toDouble / 1000000
s"File: $fileName, word count: $wordsCount, elapsed time: $elapsedTime ms")
Runs the word count app.
sbt> run DIRNAME
def main(args: Array[String]): Unit = {
val wordsCountFuture =
for {
files <- listFiles(args(0))
filesWordsCount <-
(files map fileWordsCount).foldRight(Future successful 0) {
(a, b) => for { aVal <- a; bVal <- b } yield aVal + bVal
} yield filesWordsCount
println(Await.result(wordsCountFuture, Duration.Inf))
