Skip to content

Instantly share code, notes, and snippets.

@maxandersen
Created December 22, 2020 00:07
Show Gist options
  • Save maxandersen/920cb33f6007f046bb8698412b57f8d8 to your computer and use it in GitHub Desktop.
Save maxandersen/920cb33f6007f046bb8698412b57f8d8 to your computer and use it in GitHub Desktop.
///usr/bin/env jbang "$0" "$@" ; exit $?
//DEPS info.picocli:picocli:4.5.0
//DEPS org.slf4j:slf4j-nop:1.7.30
//DEPS org.datavec:datavec-local:1.0.0-beta7
//DEPS joinery:joinery-dataframe:1.9
//DEPS org.apache.poi:poi:4.1.2
//DEPS tech.tablesaw:tablesaw-core:0.38.1
import static java.lang.System.out;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.concurrent.Callable;
import org.datavec.api.records.reader.RecordReader;
import org.datavec.api.records.reader.impl.csv.CSVRecordReader;
import org.datavec.api.split.FileSplit;
import org.datavec.api.transform.TransformProcess;
import org.datavec.api.transform.schema.InferredSchema;
import org.datavec.api.transform.schema.Schema;
import org.datavec.local.transforms.LocalTransformProcessRecordReader;
import joinery.DataFrame;
import picocli.CommandLine;
import picocli.CommandLine.Command;
import picocli.CommandLine.Parameters;
import tech.tablesaw.api.Table;
@Command(name = "dataframes", mixinStandardHelpOptions = true, version = "dataframes 0.1", description = "datavec made with jbang")
class dataframes implements Callable<Integer> {
@Parameters(index = "0", description = "The greeting to print", defaultValue = "World!")
private String greeting;
public static void main(String... args) {
int exitCode = new CommandLine(new dataframes()).execute(args);
System.exit(exitCode);
}
@Override
public Integer call() throws Exception { // your business logic goes here...
Path data = Paths.get("data.csv");
URL url = new URL("https://people.sc.fsu.edu/~jburkardt/data/csv/airtravel.csv");
InputStream in = url.openStream();
Files.copy(in, data, StandardCopyOption.REPLACE_EXISTING);
out.println("datavec:");
readWithDatavec(data);
out.println("joinery:");
readWithJoinery(data);
out.println("tablesaw:");
readWithTableSaw(data);
return 0;
}
private void readWithTableSaw(Path data) throws IOException {
out.println(Table.read().csv(data.toFile()));
}
private void readWithJoinery(Path data) throws IOException {
out.println(DataFrame.readCsv(data.toString()));
}
void readWithDatavec(Path data) throws IOException, InterruptedException {
RecordReader rr = new CSVRecordReader(1);
rr.initialize(new FileSplit(data.toFile()));
Schema schema = new InferredSchema(data.toString()).build();
//TransformProcess even if empty seem to be required.
TransformProcess tp = new TransformProcess.Builder(schema).build();
LocalTransformProcessRecordReader tprr =
new LocalTransformProcessRecordReader(rr, tp);
while(tprr.hasNext()) {
out.println(tprr.next());
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment