Created
December 22, 2020 00:07
-
-
Save maxandersen/920cb33f6007f046bb8698412b57f8d8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
///usr/bin/env jbang "$0" "$@" ; exit $? | |
//DEPS info.picocli:picocli:4.5.0 | |
//DEPS org.slf4j:slf4j-nop:1.7.30 | |
//DEPS org.datavec:datavec-local:1.0.0-beta7 | |
//DEPS joinery:joinery-dataframe:1.9 | |
//DEPS org.apache.poi:poi:4.1.2 | |
//DEPS tech.tablesaw:tablesaw-core:0.38.1 | |
import static java.lang.System.out; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.net.URL; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
import java.nio.file.Paths; | |
import java.nio.file.StandardCopyOption; | |
import java.util.concurrent.Callable; | |
import org.datavec.api.records.reader.RecordReader; | |
import org.datavec.api.records.reader.impl.csv.CSVRecordReader; | |
import org.datavec.api.split.FileSplit; | |
import org.datavec.api.transform.TransformProcess; | |
import org.datavec.api.transform.schema.InferredSchema; | |
import org.datavec.api.transform.schema.Schema; | |
import org.datavec.local.transforms.LocalTransformProcessRecordReader; | |
import joinery.DataFrame; | |
import picocli.CommandLine; | |
import picocli.CommandLine.Command; | |
import picocli.CommandLine.Parameters; | |
import tech.tablesaw.api.Table; | |
@Command(name = "dataframes", mixinStandardHelpOptions = true, version = "dataframes 0.1", description = "datavec made with jbang") | |
class dataframes implements Callable<Integer> { | |
@Parameters(index = "0", description = "The greeting to print", defaultValue = "World!") | |
private String greeting; | |
public static void main(String... args) { | |
int exitCode = new CommandLine(new dataframes()).execute(args); | |
System.exit(exitCode); | |
} | |
@Override | |
public Integer call() throws Exception { // your business logic goes here... | |
Path data = Paths.get("data.csv"); | |
URL url = new URL("https://people.sc.fsu.edu/~jburkardt/data/csv/airtravel.csv"); | |
InputStream in = url.openStream(); | |
Files.copy(in, data, StandardCopyOption.REPLACE_EXISTING); | |
out.println("datavec:"); | |
readWithDatavec(data); | |
out.println("joinery:"); | |
readWithJoinery(data); | |
out.println("tablesaw:"); | |
readWithTableSaw(data); | |
return 0; | |
} | |
private void readWithTableSaw(Path data) throws IOException { | |
out.println(Table.read().csv(data.toFile())); | |
} | |
private void readWithJoinery(Path data) throws IOException { | |
out.println(DataFrame.readCsv(data.toString())); | |
} | |
void readWithDatavec(Path data) throws IOException, InterruptedException { | |
RecordReader rr = new CSVRecordReader(1); | |
rr.initialize(new FileSplit(data.toFile())); | |
Schema schema = new InferredSchema(data.toString()).build(); | |
//TransformProcess even if empty seem to be required. | |
TransformProcess tp = new TransformProcess.Builder(schema).build(); | |
LocalTransformProcessRecordReader tprr = | |
new LocalTransformProcessRecordReader(rr, tp); | |
while(tprr.hasNext()) { | |
out.println(tprr.next()); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment