Last active
November 27, 2023 11:41
-
-
Save RealNeGate/a7b2b81241d4057d3f4ad9d20a633ecb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.nio.file.*; | |
import java.io.*; | |
import java.util.*; | |
import java.nio.charset.StandardCharsets; | |
class csv { | |
// i don't want all my fucking int array elements boxed. | |
static class IntArray { | |
int cnt; | |
int[] data; | |
IntArray(int init_cap) { data = new int[init_cap]; cnt = 0; } | |
void reset() { cnt = 0; } | |
int get(int i) { return data[i]; } | |
void put(int v) { | |
// grow array | |
if (cnt == data.length) { | |
int[] new_data = new int[cnt * 2]; | |
System.arraycopy(data, 0, new_data, 0, cnt); | |
data = new_data; | |
} | |
data[cnt++] = v; | |
} | |
} | |
static class CSV { | |
// points into data to show where rows start | |
IntArray rows; | |
// represent slices: | |
// data[parsed[i*2 + 0] .. parsed[i*2 + 1]) | |
IntArray parsed; | |
// Actual file bytes | |
byte[] data; | |
CSV() { | |
rows = new IntArray(2*1024*1024); | |
parsed = new IntArray(2*1024*1024); | |
} | |
void parse(byte data[]) { | |
this.data = data; | |
rows.reset(); | |
parsed.reset(); | |
// for each line | |
int i = 0; | |
while (i < data.length) { | |
rows.put(parsed.cnt); | |
parsed.put(i); | |
do { | |
byte c = data[i++]; | |
if (c == '\r' || c == '\n') { | |
// line break (CRLF included) | |
if (i < data.length && c + data[i] == '\r' + '\n') { | |
i += 1; | |
} | |
break; | |
} else if (c == ',') { | |
parsed.put(i); | |
} else if (c == '"') { | |
while (data[i] != '"') { i++; } | |
i += 1; | |
} | |
} while (i < data.length); | |
} | |
// write out EOF | |
rows.put(parsed.cnt); | |
parsed.put(data.length); | |
} | |
int row_len(int i) { return rows.get(i + 1) - rows.get(i); } | |
String entry(int i, int j) { | |
int k = rows.get(i); | |
int start = parsed.get(k+j); | |
int end = parsed.get(k+j+1) - 1; | |
return new String(data, start, end - start, StandardCharsets.UTF_8); | |
} | |
}; | |
public static void main(String[] args) throws IOException { | |
if (args[0].equals("-single")) { | |
// single test | |
byte[] data = Files.readAllBytes(Paths.get(args[1])); | |
var csv = new CSV(); | |
csv.parse(data); | |
System.out.println(csv.entry(csv.rows.cnt - 2, 2)); | |
} else { | |
byte[] data = Files.readAllBytes(Paths.get(args[0])); | |
var csv = new CSV(); | |
long avg = 0; | |
for (int i = 0; i < 10; i++) { | |
long start = System.nanoTime(); | |
csv.parse(data); | |
long elapsed = System.nanoTime() - start; | |
avg += elapsed / 10; | |
double sec = elapsed / 1000000000.0; | |
double bw = (double) data.length / sec; | |
bw /= 1048576.0f; | |
System.out.printf("Run %d: %f MB/s (avg %f seconds)\n", 1+i, bw, sec, csv.entry(csv.rows.cnt - 2, 2)); | |
} | |
double sec = avg / 1000000000.0; | |
double bw = ((double) data.length / sec); | |
bw /= 1048576.0f; | |
System.out.printf("Average nanos: %f MB/s (avg %f seconds)", bw, sec); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment