Last active
May 26, 2017 14:20
-
-
Save John-Colvin/980b11f2b7a7e23faf8dfb44bd9f1242 to your computer and use it in GitHub Desktop.
tsv with iopipe
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import iopipe.textpipe; | |
import iopipe.bufpipe; | |
import iopipe.stream; | |
import iopipe.buffer; | |
int main(string[] args) | |
{ | |
import std.stdio; | |
if (args.length < 4) | |
{ | |
writefln("synopsis: %s filename keyfield valuefield", args[0]); | |
return 1; | |
} | |
import std.conv : to; | |
string filename = args[1]; | |
size_t keyFieldIndex = args[2].to!size_t; | |
size_t valueFieldIndex = args[3].to!size_t; | |
auto sumByKey = runWithEncoding!makeCounts(filename, keyFieldIndex, valueFieldIndex); | |
if (sumByKey.length == 0) | |
writeln("No entries"); | |
else | |
{ | |
import std.algorithm : maxElement; | |
auto maxEntry = sumByKey.byKeyValue.maxElement!"a.value"; | |
writeln("max_key: ", maxEntry.key, " sum: ", maxEntry.value); | |
} | |
return 0; | |
} | |
/** something vaguely like this should be in iopipe, users shouldn't need to write it */ | |
auto ref runWithEncoding(alias process, FileT, Args...)(FileT file, auto ref Args args) | |
{ | |
auto dev = openDev(file).bufd; | |
dev.ensureElems(4); | |
switch(dev.window.detectBOM) | |
{ | |
case UTFType.Unknown: | |
case UTFType.UTF8: | |
return process!(UTFType.UTF8)(dev, args); | |
case UTFType.UTF16LE: | |
return process!(UTFType.UTF16LE)(dev, args); | |
case UTFType.UTF16BE: | |
return process!(UTFType.UTF16BE)(dev, args); | |
case UTFType.UTF32LE: | |
return process!(UTFType.UTF32LE)(dev, args); | |
case UTFType.UTF32BE: | |
return process!(UTFType.UTF32BE)(dev, args); | |
default: | |
assert(0); | |
} | |
} | |
auto makeCounts(UTFType utfType, Dev)(Dev dev, size_t keyFieldIndex, size_t valueFieldIndex) | |
{ | |
import std.algorithm : max, min, splitter; | |
import std.conv : to; | |
import std.string : lineSplitter; | |
import std.range : take; | |
import std.traits : ForeachType; | |
size_t minFieldIndex = min(keyFieldIndex, valueFieldIndex); | |
size_t maxFieldIndex = max(keyFieldIndex, valueFieldIndex); | |
enum delim = "\t"; | |
auto lines = dev.decodeText!utfType.byLine.asInputRange; | |
alias CharT = ForeachType!(typeof(lines.front())); | |
int[immutable(CharT)[]] sumByKey; | |
foreach (line; lines) | |
{ | |
auto splitting = line.splitter(delim); | |
typeof(splitting.front) key; | |
int value; | |
size_t fieldIndex = 0; | |
foreach (str; splitting.take(maxFieldIndex + 1)) | |
{ | |
if (fieldIndex == keyFieldIndex) | |
key = str; | |
if (fieldIndex == valueFieldIndex) | |
value = str.to!int; | |
++fieldIndex; | |
} | |
if (fieldIndex == maxFieldIndex + 1) | |
{ | |
if (auto p = key in sumByKey) | |
*p += value; | |
else | |
sumByKey[key.idup] = value; | |
} | |
} | |
static if (is(CharT == char)) | |
return sumByKey; | |
else | |
return sumByKey.to!(int[string]); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment