Skip to content

Instantly share code, notes, and snippets.

@andrewm4894
Last active May 9, 2019 10:06
Show Gist options
  • Select an option

  • Save andrewm4894/cf6fb686ea7fc9386921ad83b5b6fe2d to your computer and use it in GitHub Desktop.

Select an option

Save andrewm4894/cf6fb686ea7fc9386921ad83b5b6fe2d to your computer and use it in GitHub Desktop.
package org.datavec.transform.basic;
import org.datavec.api.records.reader.RecordReader;
import org.datavec.api.records.reader.impl.csv.CSVRecordReader;
import org.datavec.api.split.FileSplit;
import org.datavec.api.transform.TransformProcess;
import org.datavec.api.transform.schema.Schema;
import org.datavec.api.transform.transform.sequence.SequenceOffsetTransform;
import org.datavec.api.writable.Writable;
import org.datavec.local.transforms.LocalTransformExecutor;
import org.joda.time.DateTimeZone;
import org.nd4j.linalg.io.ClassPathResource;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class myExample {
public static void main(String[] args) throws Exception {
Schema inputDataSchema = new Schema.Builder()
.addColumnString("DateTimeString")
.addColumnsString("CustomerID", "MerchantID")
.addColumnInteger("NumItemsInTransaction")
.addColumnCategorical("MerchantCountryCode", Arrays.asList("USA","CAN","FR","MX"))
.addColumnDouble("TransactionAmountUSD",0.0,null,false,false) //$0.0 or more, no maximum limit, no NaN and no Infinite values
.addColumnCategorical("FraudLabel", Arrays.asList("Fraud","Legit"))
.build();
TransformProcess tp = new TransformProcess.Builder(inputDataSchema)
.removeAllColumnsExceptFor("DateTimeString","TransactionAmountUSD")
.stringToTimeTransform("DateTimeString","YYYY-MM-DD HH:mm:ss.SSS", DateTimeZone.UTC)
.offsetSequence(Arrays.asList("TransactionAmountUSD"),1, SequenceOffsetTransform.OperationType.NewColumn)
.build();
File inputFile = new ClassPathResource("BasicDataVecExample/exampledata.csv").getFile();
//Define input reader and output writer:
RecordReader rr = new CSVRecordReader(0, ',');
//RecordReader rr = new CSVVariableSlidingWindowRecordReader(5, 2);
rr.initialize(new FileSplit(inputFile));
//Process the data:
List<List<Writable>> originalData = new ArrayList<>();
while(rr.hasNext()){
originalData.add(rr.next());
}
List<List<Writable>> processedData = LocalTransformExecutor.execute(originalData, tp);
int numRows = 5;
System.out.println("=== BEFORE ===");
for (int i=0;i<=numRows;i++) {
System.out.println(originalData.get(i));
}
System.out.println("=== AFTER ===");
for (int i=0;i<=numRows;i++) {
System.out.println(processedData.get(i));
}
}
}
/* ERROR I GET:
Exception in thread "main" java.lang.UnsupportedOperationException: SequenceOffsetTransform cannot be applied to non-sequence data
at org.datavec.api.transform.transform.sequence.SequenceOffsetTransform.map(SequenceOffsetTransform.java:159)
at org.datavec.local.transforms.transform.LocalTransformFunction.apply(LocalTransformFunction.java:48)
at org.datavec.local.transforms.transform.LocalTransformFunction.apply(LocalTransformFunction.java:32)
at org.datavec.local.transforms.LocalTransformExecutor.lambda$execute$3(LocalTransformExecutor.java:340)
at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1382)
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:471)
at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708)
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:499)
at org.datavec.local.transforms.LocalTransformExecutor.execute(LocalTransformExecutor.java:340)
at org.datavec.local.transforms.LocalTransformExecutor.execute(LocalTransformExecutor.java:93)
at org.datavec.transform.basic.myExample.main(myExample.java:51)
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment