Skip to content

Instantly share code, notes, and snippets.

@UberMouse
Created January 31, 2014 02:40
Show Gist options
  • Save UberMouse/8725744 to your computer and use it in GitHub Desktop.
Save UberMouse/8725744 to your computer and use it in GitHub Desktop.
Second version of CSV Splitter, better on memory but still uses too much.
import java.io.{PrintWriter, File}
import scala.io.Source
/**
* Created by wyntl1 on 29/01/14.
*/
object Main extends App {
if(args.length != 4) {
val arguments = Array("location of csv files: string", "output directory: string", "sets: int", "set size (lines): int")
val example = "C:\\csvs C:\\csvs\\out 5 10000"
val messageBody = s"Application takes two arguments: ${arguments.mkString(",")} Too few or too many arguments passed"
val errorMsg = messageBody + "\n" + example
throw new Error(errorMsg)
}
val LINE_SEPARATOR = System.getProperty("line.separator")
val csvDir = new File(args(0))
val outputDir = new File(args(1))
val totalSets = args(2).toInt
val setSize = args(3).toInt
if(!outputDir.exists()) outputDir.mkdirs()
assert(csvDir.isDirectory, "Location of CSV files is not a directory")
assert(outputDir.isDirectory, "Output location is not a directory")
assert(totalSets > 0, "sets must be at least 1")
assert(setSize > 0, "set size must be at least 1")
val csvFiles = csvDir.listFiles().filter(x => x.getName.endsWith(".csv"))
val csvHeader = {
val src = Source.fromFile(csvFiles.head.getAbsolutePath)
val line = src.getLines().take(1).toList.head
src.close()
line
}
var lines:Array[String] = Array[String]()
for((f, i) <- csvFiles.zipWithIndex) {
lines ++= loadCsvFile(f)
while(lines.length > setSize) {
writeCsvFile(lines.take(setSize).toArray, i)
lines = lines.drop(setSize)
}
}
def writeCsvFile(lines: Array[String], index: Int) {
val folder = index match {
case 0 =>
"Initial"
case _ =>
s"Increment $index"
}
val fullPath = new File(outputDir, folder)
val fullLocation = new File(fullPath, "out.csv")
if(fullLocation.exists()) fullLocation.delete()
fullPath.mkdirs()
fullLocation.createNewFile()
val writer = new PrintWriter(fullLocation)
try {
writer.write(csvHeader + LINE_SEPARATOR)
for(l <- lines)
writer.write(l + LINE_SEPARATOR)
}
finally {
writer.close()
}
}
def loadCsvFile(f: File) = {
var loadedLines:Array[String] = null
val file = Source.fromFile(f.getAbsolutePath)
try {
//drop(1) skips CSV header, captured previously. Assumes all headers are same
loadedLines = file.getLines().drop(1).toArray
}
finally {
file.close()
}
loadedLines
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment