Created
January 31, 2014 02:40
-
-
Save UberMouse/8725744 to your computer and use it in GitHub Desktop.
Second version of CSV Splitter, better on memory but still uses too much.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.{PrintWriter, File} | |
import scala.io.Source | |
/** | |
* Created by wyntl1 on 29/01/14. | |
*/ | |
object Main extends App { | |
if(args.length != 4) { | |
val arguments = Array("location of csv files: string", "output directory: string", "sets: int", "set size (lines): int") | |
val example = "C:\\csvs C:\\csvs\\out 5 10000" | |
val messageBody = s"Application takes two arguments: ${arguments.mkString(",")} Too few or too many arguments passed" | |
val errorMsg = messageBody + "\n" + example | |
throw new Error(errorMsg) | |
} | |
val LINE_SEPARATOR = System.getProperty("line.separator") | |
val csvDir = new File(args(0)) | |
val outputDir = new File(args(1)) | |
val totalSets = args(2).toInt | |
val setSize = args(3).toInt | |
if(!outputDir.exists()) outputDir.mkdirs() | |
assert(csvDir.isDirectory, "Location of CSV files is not a directory") | |
assert(outputDir.isDirectory, "Output location is not a directory") | |
assert(totalSets > 0, "sets must be at least 1") | |
assert(setSize > 0, "set size must be at least 1") | |
val csvFiles = csvDir.listFiles().filter(x => x.getName.endsWith(".csv")) | |
val csvHeader = { | |
val src = Source.fromFile(csvFiles.head.getAbsolutePath) | |
val line = src.getLines().take(1).toList.head | |
src.close() | |
line | |
} | |
var lines:Array[String] = Array[String]() | |
for((f, i) <- csvFiles.zipWithIndex) { | |
lines ++= loadCsvFile(f) | |
while(lines.length > setSize) { | |
writeCsvFile(lines.take(setSize).toArray, i) | |
lines = lines.drop(setSize) | |
} | |
} | |
def writeCsvFile(lines: Array[String], index: Int) { | |
val folder = index match { | |
case 0 => | |
"Initial" | |
case _ => | |
s"Increment $index" | |
} | |
val fullPath = new File(outputDir, folder) | |
val fullLocation = new File(fullPath, "out.csv") | |
if(fullLocation.exists()) fullLocation.delete() | |
fullPath.mkdirs() | |
fullLocation.createNewFile() | |
val writer = new PrintWriter(fullLocation) | |
try { | |
writer.write(csvHeader + LINE_SEPARATOR) | |
for(l <- lines) | |
writer.write(l + LINE_SEPARATOR) | |
} | |
finally { | |
writer.close() | |
} | |
} | |
def loadCsvFile(f: File) = { | |
var loadedLines:Array[String] = null | |
val file = Source.fromFile(f.getAbsolutePath) | |
try { | |
//drop(1) skips CSV header, captured previously. Assumes all headers are same | |
loadedLines = file.getLines().drop(1).toArray | |
} | |
finally { | |
file.close() | |
} | |
loadedLines | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment