Skip to content

Instantly share code, notes, and snippets.

@codahale
Forked from dannylagrouw/ReadNG27-fold.scala
Created March 16, 2010 06:53
Show Gist options
  • Save codahale/333712 to your computer and use it in GitHub Desktop.
Save codahale/333712 to your computer and use it in GitHub Desktop.
// Scala 2.7.7, immutable map, uses foldLeft to loop over files/words
import java.io._
import scala.io._
def time(f: => Unit) = {
val t1 = System.currentTimeMillis
f
((System.currentTimeMillis - t1)/1000.0)
}
def processNewsgroups(rootDir: File): Unit = {
def write(map: Map[String, Int], file: String)(sort: (Tuple2[String, Int], Tuple2[String, Int]) => Boolean) {
using(new PrintWriter(new FileWriter(file))) { out =>
map.toList.sort(sort).foreach { pair => out.println(pair._1 + "\t" + pair._2) }
}
}
def using[Closeable <: {def close(): Unit}, B](closeable: Closeable)(getB: Closeable => B): B =
try {
getB(closeable)
} finally {
closeable.close()
}
implicit def file2String(file: File): String = Source.fromFile(file, "ISO-8859-1").getLines.mkString("\n")
var counts = (Map.empty[String, Int].withDefaultValue(0) /:
rootDir.listFiles.filter(_.isDirectory).flatMap(_.listFiles).flatMap(_.toLowerCase.split("""\W+"""))) (
(c,word) => c + (word -> (1 + c(word))) )
write(counts, "counts-descreasing-scala.txt") {_._2 > _._2}
write(counts, "counts-alphabetical-scala.txt") {_._1 < _._1}
}
time(processNewsgroups(new File("./20_newsgroups")))
// Scala 2.7.7, mutable map, loops over files/words with foreach
import java.io._
import scala.io._
import scala.collection.mutable._
def time(f: => Unit) = {
val t1 = System.currentTimeMillis
f
((System.currentTimeMillis - t1)/1000.0)
}
def processNewsgroups(rootDir: File): Unit = {
def write(map: Map[String, Int], file: String)(sort: (Tuple2[String, Int], Tuple2[String, Int]) => Boolean) {
using(new PrintWriter(new FileWriter(file))) { out =>
map.toList.sort(sort).foreach { pair => out.println(pair._1 + "\t" + pair._2) }
}
}
def using[Closeable <: {def close(): Unit}, B](closeable: Closeable)(getB: Closeable => B): B =
try {
getB(closeable)
} finally {
closeable.close()
}
implicit def file2String(file: File): String = Source.fromFile(file, "ISO-8859-1").getLines.mkString("\n")
val counts = Map.empty[String, Int]
rootDir.listFiles.filter(_.isDirectory).flatMap(_.listFiles).foreach { file: File =>
val s = file2String(file)
s.toLowerCase.split("""\W+""").foreach { word =>
counts(word) = if (counts.contains(word)) counts(word) + 1 else 1
}
}
write(counts, "counts-descreasing-scala.txt") {_._2 > _._2}
write(counts, "counts-alphabetical-scala.txt") {_._1 < _._1}
}
time(processNewsgroups(new File("./20_newsgroups")))
// Scala 2.8, immutable map, uses foldLeft to loop over files/words
import java.io._
import scala.io._
def time(f: => Unit) = {
val t1 = System.currentTimeMillis
f
((System.currentTimeMillis - t1)/1000.0)
}
def processNewsgroups(rootDir: File): Unit = {
def write(map: Map[String, Int], file: String)(sort: (Tuple2[String, Int], Tuple2[String, Int]) => Boolean) {
using(new PrintWriter(new FileWriter(file))) { out =>
map.toList.sort(sort).foreach { pair => out.println(pair._1 + "\t" + pair._2) }
}
}
def using[Closeable <: {def close(): Unit}, B](closeable: Closeable)(getB: Closeable => B): B =
try {
getB(closeable)
} finally {
closeable.close()
}
implicit def file2String(file: File): String = Source.fromFile(file, 32768)(Codec.ISO8859).getLines().mkString("\n")
var counts = (Map.empty[String, Int].withDefaultValue(0) /:
rootDir.listFiles.filter(_.isDirectory).flatMap(_.listFiles).flatMap(_.toLowerCase.split("""\W+"""))) (
(c,word) => c + (word -> (1 + c(word))) )
write(counts, "counts-descreasing-scala.txt") {_._2 > _._2}
write(counts, "counts-alphabetical-scala.txt") {_._1 < _._1}
}
time(processNewsgroups(new File("./20_newsgroups")))
// Scala 2.8, mutable map, loops over files/words with foreach
import java.io._
import scala.io._
import scala.collection.mutable._
def time(f: => Unit) = {
val t1 = System.currentTimeMillis
f
((System.currentTimeMillis - t1)/1000.0)
}
def processNewsgroups(rootDir: File): Unit = {
def write(map: Map[String, Int], file: String)(sort: (Tuple2[String, Int], Tuple2[String, Int]) => Boolean) {
using(new PrintWriter(new FileWriter(file))) { out =>
map.toList.sort(sort).foreach { pair => out.println(pair._1 + "\t" + pair._2) }
}
}
def using[Closeable <: {def close(): Unit}, B](closeable: Closeable)(getB: Closeable => B): B =
try {
getB(closeable)
} finally {
closeable.close()
}
implicit def file2String(file: File): String = Source.fromFile(file)(Codec.ISO8859).getLines().mkString("\n")
val counts = Map.empty[String, Int]
rootDir.listFiles.filter(_.isDirectory).flatMap(_.listFiles).foreach { file: File =>
val s = file2String(file)
s.toLowerCase.split("""\W+""").foreach { word =>
counts(word) = if (counts.contains(word)) counts(word) + 1 else 1
}
}
write(counts, "counts-descreasing-scala.txt") {_._2 > _._2}
write(counts, "counts-alphabetical-scala.txt") {_._1 < _._1}
}
time(processNewsgroups(new File("./20_newsgroups")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment