Last active
November 27, 2023 11:41
Revisions
-
RealNeGate revised this gist
Nov 27, 2023 . 1 changed file with 1 addition and 2 deletions.There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -60,8 +60,7 @@ void parse(byte data[]) { } else if (c == ',') { parsed.put(i); } else if (c == '"') { while (data[i++] != '"') {} } } while (i < data.length); } -
RealNeGate created this gist
Nov 25, 2023 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,115 @@ import java.nio.file.*; import java.io.*; import java.util.*; import java.nio.charset.StandardCharsets; class csv { // i don't want all my fucking int array elements boxed. static class IntArray { int cnt; int[] data; IntArray(int init_cap) { data = new int[init_cap]; cnt = 0; } void reset() { cnt = 0; } int get(int i) { return data[i]; } void put(int v) { // grow array if (cnt == data.length) { int[] new_data = new int[cnt * 2]; System.arraycopy(data, 0, new_data, 0, cnt); data = new_data; } data[cnt++] = v; } } static class CSV { // points into data to show where rows start IntArray rows; // represent slices: // data[parsed[i*2 + 0] .. parsed[i*2 + 1]) IntArray parsed; // Actual file bytes byte[] data; CSV() { rows = new IntArray(2*1024*1024); parsed = new IntArray(2*1024*1024); } void parse(byte data[]) { this.data = data; rows.reset(); parsed.reset(); // for each line int i = 0; while (i < data.length) { rows.put(parsed.cnt); parsed.put(i); do { byte c = data[i++]; if (c == '\r' || c == '\n') { // line break (CRLF included) if (i < data.length && c + data[i] == '\r' + '\n') { i += 1; } break; } else if (c == ',') { parsed.put(i); } else if (c == '"') { while (data[i] != '"') { i++; } i += 1; } } while (i < data.length); } // write out EOF rows.put(parsed.cnt); parsed.put(data.length); } int row_len(int i) { return rows.get(i + 1) - rows.get(i); } String entry(int i, int j) { int k = rows.get(i); int start = parsed.get(k+j); int end = parsed.get(k+j+1) - 1; return new String(data, start, end - start, StandardCharsets.UTF_8); } }; public static void main(String[] args) throws IOException { if (args[0].equals("-single")) { // single test byte[] data = Files.readAllBytes(Paths.get(args[1])); var csv = new CSV(); csv.parse(data); System.out.println(csv.entry(csv.rows.cnt - 2, 2)); } else { byte[] data = Files.readAllBytes(Paths.get(args[0])); var csv = new CSV(); long avg = 0; for (int i = 0; i < 10; i++) { long start = System.nanoTime(); csv.parse(data); long elapsed = System.nanoTime() - start; avg += elapsed / 10; double sec = elapsed / 1000000000.0; double bw = (double) data.length / sec; bw /= 1048576.0f; System.out.printf("Run %d: %f MB/s (avg %f seconds)\n", 1+i, bw, sec, csv.entry(csv.rows.cnt - 2, 2)); } double sec = avg / 1000000000.0; double bw = ((double) data.length / sec); bw /= 1048576.0f; System.out.printf("Average nanos: %f MB/s (avg %f seconds)", bw, sec); } } }