Created
January 31, 2014 02:25
-
-
Save UberMouse/8725600 to your computer and use it in GitHub Desktop.
Leaks memory like a sieve
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import java.io.{PrintWriter, File} | |
| import scala.io.Source | |
| /** | |
| * Created by wyntl1 on 29/01/14. | |
| */ | |
| object Main extends App { | |
| if(args.length != 4) { | |
| val arguments = Array("location of csv files: string", "output directory: string", "sets: int", "set size (lines): int") | |
| val example = "C:\\csvs C:\\csvs\\out 5 10000" | |
| val messageBody = s"Application takes two arguments: ${arguments.mkString(",")} Too few or too many arguments passed" | |
| val errorMsg = messageBody + "\n" + example | |
| throw new Error(errorMsg) | |
| } | |
| val LINE_SEPARATOR = System.getProperty("line.separator") | |
| val csvDir = new File(args(0)) | |
| val outputDir = new File(args(1)) | |
| val totalSets = args(2).toInt | |
| val setSize = args(3).toInt | |
| if(!outputDir.exists()) outputDir.mkdirs() | |
| assert(csvDir.isDirectory, "Location of CSV files is not a directory") | |
| assert(outputDir.isDirectory, "Output location is not a directory") | |
| assert(totalSets > 0, "sets must be at least 1") | |
| assert(setSize > 0, "set size must be at least 1") | |
| val csvFiles = csvDir.listFiles().filter(x => x.getName.endsWith(".csv")) | |
| val csvHeader = { | |
| val src = Source.fromFile(csvFiles.head.getAbsolutePath) | |
| val line = src.getLines().take(1).toList.head | |
| src.close() | |
| line | |
| } | |
| readWriteCsvFiles(csvFiles) | |
| def readWriteCsvFiles(csvFiles:Array[File]) = { | |
| def recur(csvFiles:Array[File], lines:Array[String] = Array[String](), index:Int = 0, skipLines:Int = 0) { | |
| if(csvFiles.length == 0 || index == totalSets) return | |
| val file = csvFiles.head | |
| val (readLines, hasMore) = processCsvFile(file, lines, skipLines) | |
| if(readLines.length > setSize) throw new Error("Loaded lines is somehow greater than set size") | |
| if(readLines.length == setSize) { | |
| writeCsvFile(readLines, index) | |
| if(hasMore) recur(csvFiles, index = index+1, skipLines = skipLines + (readLines.length - lines.length)) | |
| } | |
| if(readLines.length < setSize) recur(csvFiles.drop(1), readLines, index) | |
| recur(csvFiles.drop(1)) | |
| } | |
| recur(csvFiles) | |
| } | |
| def writeCsvFile(lines: Array[String], index: Int) { | |
| val folder = index match { | |
| case 0 => | |
| "Initial" | |
| case _ => | |
| s"Increment $index" | |
| } | |
| val fullPath = new File(outputDir, folder) | |
| val fullLocation = new File(fullPath, "out.csv") | |
| if(fullLocation.exists()) fullLocation.delete() | |
| fullPath.mkdirs() | |
| fullLocation.createNewFile() | |
| val writer = new PrintWriter(fullLocation) | |
| try { | |
| writer.write(csvHeader + LINE_SEPARATOR) | |
| for(l <- lines) | |
| writer.write(l + LINE_SEPARATOR) | |
| } | |
| finally { | |
| writer.close() | |
| } | |
| } | |
| //startIndex = 1 skips CSV header file. Captured previously. Assumes all headers are same | |
| def processCsvFile(f: File, prevLoadedLines:Array[String] = Array[String](), startIndex:Int = 1) = { | |
| var loadedLines:Array[String] = null | |
| val file = Source.fromFile(f.getAbsolutePath) | |
| var hasMore = false | |
| try { | |
| val lines = file.getLines().drop(if(startIndex == 0) 1 else startIndex) | |
| loadedLines = lines.take(setSize-prevLoadedLines.length).toArray | |
| hasMore = lines.hasNext || loadedLines.length == setSize | |
| } | |
| finally { | |
| file.close() | |
| } | |
| (prevLoadedLines ++ loadedLines, hasMore) | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment