Skip to content

Instantly share code, notes, and snippets.

@rickbassham
Created August 19, 2019 14:51
Show Gist options
  • Save rickbassham/0b8a76b9e2dfd83d2279d0ccb8f897e3 to your computer and use it in GitHub Desktop.
Save rickbassham/0b8a76b9e2dfd83d2279d0ccb8f897e3 to your computer and use it in GitHub Desktop.
mleap - Inconsistent schema using row transformer
package com.rickbassham.test;
import java.io.File;
import java.util.Arrays;
import java.util.List;
import ml.combust.bundle.dsl.Bundle;
import ml.combust.mleap.core.types.StructField;
import ml.combust.mleap.core.types.StructType;
import ml.combust.mleap.runtime.MleapContext;
import ml.combust.mleap.runtime.frame.DefaultLeapFrame;
import ml.combust.mleap.runtime.frame.Row;
import ml.combust.mleap.runtime.frame.RowTransformer;
import ml.combust.mleap.runtime.frame.Transformer;
import ml.combust.mleap.runtime.javadsl.BundleBuilder;
import ml.combust.mleap.runtime.javadsl.ContextBuilder;
import ml.combust.mleap.runtime.javadsl.LeapFrameBuilder;
import ml.combust.mleap.runtime.javadsl.RowTransformerSupport;
import scala.collection.JavaConversions;
public class Test {
public static void main(String[] args) {
MleapContext context = new ContextBuilder().createMleapContext();
// https://raw.githubusercontent.com/combust/mleap/master/mleap-benchmark/src/main/resources/models/airbnb.model.rf.zip
File file = new File("airbnb.model.rf.zip");
BundleBuilder bundleBuilder = new BundleBuilder();
Bundle<Transformer> b = bundleBuilder.load(file, context);
Transformer transformer = b.root();
StructType fullSchema = transformer.schema();
printSchema("full", fullSchema);
List<StructField> fullFields = JavaConversions.seqAsJavaList(fullSchema.fields());
StructType inputSchema = transformer.inputSchema();
printSchema("input", inputSchema);
StructType outputSchema = transformer.outputSchema();
printSchema("output", outputSchema);
RowTransformer rt = transformer.transform(new RowTransformerSupport().createRowTransformer(inputSchema)).get();
LeapFrameBuilder frameBuilder = new LeapFrameBuilder();
Row r = frameBuilder.createRow(100.0, 2.0, "0.0", "Entire home/apt", "NY", "strict", 500.0, 75.0, 2.0, 1.0,
"0.0", 4.3, 25.0);
DefaultLeapFrame frame = frameBuilder.createFrame(inputSchema, Arrays.asList(r));
DefaultLeapFrame frameResult = transformer.transform(frame).get();
printSchema("frame", frameResult.schema());
Row result = rt.transform(r);
System.out.println(String.format("result fields: %d", result.size()));
for (int i = 0; i < result.size(); i++) {
Object val = result.getRaw(i);
if (val == null) {
System.out.println(String.format("%s | %s", fullFields.get(i), null));
} else {
System.out.println(String.format("%s | %s", fullFields.get(i), val.toString()));
}
}
}
private static void printSchema(String label, StructType schema) {
List<StructField> fields = JavaConversions.seqAsJavaList(schema.fields());
System.out.println(String.format("%s fields: %d", label, fields.size()));
for (StructField field : fields) {
System.out.println(field.toString());
}
System.out.println();
System.out.println();
}
}
full fields: 22
StructField(security_deposit,ScalarType(double,true))
StructField(bedrooms,ScalarType(double,true))
StructField(instant_bookable,ScalarType(string,true))
StructField(room_type,ScalarType(string,true))
StructField(state,ScalarType(string,true))
StructField(cancellation_policy,ScalarType(string,true))
StructField(square_feet,ScalarType(double,true))
StructField(number_of_reviews,ScalarType(double,true))
StructField(extra_people,ScalarType(double,true))
StructField(bathrooms,ScalarType(double,true))
StructField(host_is_superhost,ScalarType(string,true))
StructField(review_scores_rating,ScalarType(double,true))
StructField(cleaning_fee,ScalarType(double,true))
StructField(room_type_index,ScalarType(double,false))
StructField(features_rf,TensorType(double,Some(WrappedArray(13)),true))
StructField(cancellation_policy_index,ScalarType(double,false))
StructField(host_is_superhost_index,ScalarType(double,false))
StructField(instant_bookable_index,ScalarType(double,false))
StructField(unscaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true))
StructField(scaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true))
StructField(state_index,ScalarType(double,false))
StructField(price_prediction,ScalarType(double,false))
input fields: 13
StructField(security_deposit,ScalarType(double,true))
StructField(bedrooms,ScalarType(double,true))
StructField(instant_bookable,ScalarType(string,true))
StructField(room_type,ScalarType(string,true))
StructField(state,ScalarType(string,true))
StructField(cancellation_policy,ScalarType(string,true))
StructField(square_feet,ScalarType(double,true))
StructField(number_of_reviews,ScalarType(double,true))
StructField(extra_people,ScalarType(double,true))
StructField(bathrooms,ScalarType(double,true))
StructField(host_is_superhost,ScalarType(string,true))
StructField(review_scores_rating,ScalarType(double,true))
StructField(cleaning_fee,ScalarType(double,true))
output fields: 9
StructField(room_type_index,ScalarType(double,false))
StructField(features_rf,TensorType(double,Some(WrappedArray(13)),true))
StructField(cancellation_policy_index,ScalarType(double,false))
StructField(host_is_superhost_index,ScalarType(double,false))
StructField(instant_bookable_index,ScalarType(double,false))
StructField(unscaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true))
StructField(scaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true))
StructField(state_index,ScalarType(double,false))
StructField(price_prediction,ScalarType(double,false))
frame fields: 22
StructField(security_deposit,ScalarType(double,true))
StructField(bedrooms,ScalarType(double,true))
StructField(instant_bookable,ScalarType(string,true))
StructField(room_type,ScalarType(string,true))
StructField(state,ScalarType(string,true))
StructField(cancellation_policy,ScalarType(string,true))
StructField(square_feet,ScalarType(double,true))
StructField(number_of_reviews,ScalarType(double,true))
StructField(extra_people,ScalarType(double,true))
StructField(bathrooms,ScalarType(double,true))
StructField(host_is_superhost,ScalarType(string,true))
StructField(review_scores_rating,ScalarType(double,true))
StructField(cleaning_fee,ScalarType(double,true))
StructField(unscaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true))
StructField(scaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true))
StructField(room_type_index,ScalarType(double,false))
StructField(host_is_superhost_index,ScalarType(double,false))
StructField(cancellation_policy_index,ScalarType(double,false))
StructField(instant_bookable_index,ScalarType(double,false))
StructField(state_index,ScalarType(double,false))
StructField(features_rf,TensorType(double,Some(WrappedArray(13)),true))
StructField(price_prediction,ScalarType(double,false))
result fields: 22
StructField(security_deposit,ScalarType(double,true)) | 100.0
StructField(bedrooms,ScalarType(double,true)) | 2.0
StructField(instant_bookable,ScalarType(string,true)) | 0.0
StructField(room_type,ScalarType(string,true)) | Entire home/apt
StructField(state,ScalarType(string,true)) | NY
StructField(cancellation_policy,ScalarType(string,true)) | strict
StructField(square_feet,ScalarType(double,true)) | 500.0
StructField(number_of_reviews,ScalarType(double,true)) | 75.0
StructField(extra_people,ScalarType(double,true)) | 2.0
StructField(bathrooms,ScalarType(double,true)) | 1.0
StructField(host_is_superhost,ScalarType(string,true)) | 0.0
StructField(review_scores_rating,ScalarType(double,true)) | 4.3
StructField(cleaning_fee,ScalarType(double,true)) | 25.0
StructField(room_type_index,ScalarType(double,false)) | DenseTensor([D@1b01a0d,List(8))
StructField(features_rf,TensorType(double,Some(WrappedArray(13)),true)) | DenseTensor([D@2ed71727,List(8))
StructField(cancellation_policy_index,ScalarType(double,false)) | 0.0
StructField(host_is_superhost_index,ScalarType(double,false)) | 0.0
StructField(instant_bookable_index,ScalarType(double,false)) | 0.0
StructField(unscaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true)) | 0.0
StructField(scaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true)) | 1.0
StructField(state_index,ScalarType(double,false)) | DenseTensor([D@5261ec9,List(13))
StructField(price_prediction,ScalarType(double,false)) | 144.2690841182275
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment