Created
August 19, 2019 14:51
-
-
Save rickbassham/0b8a76b9e2dfd83d2279d0ccb8f897e3 to your computer and use it in GitHub Desktop.
mleap - Inconsistent schema using row transformer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.rickbassham.test; | |
import java.io.File; | |
import java.util.Arrays; | |
import java.util.List; | |
import ml.combust.bundle.dsl.Bundle; | |
import ml.combust.mleap.core.types.StructField; | |
import ml.combust.mleap.core.types.StructType; | |
import ml.combust.mleap.runtime.MleapContext; | |
import ml.combust.mleap.runtime.frame.DefaultLeapFrame; | |
import ml.combust.mleap.runtime.frame.Row; | |
import ml.combust.mleap.runtime.frame.RowTransformer; | |
import ml.combust.mleap.runtime.frame.Transformer; | |
import ml.combust.mleap.runtime.javadsl.BundleBuilder; | |
import ml.combust.mleap.runtime.javadsl.ContextBuilder; | |
import ml.combust.mleap.runtime.javadsl.LeapFrameBuilder; | |
import ml.combust.mleap.runtime.javadsl.RowTransformerSupport; | |
import scala.collection.JavaConversions; | |
public class Test { | |
public static void main(String[] args) { | |
MleapContext context = new ContextBuilder().createMleapContext(); | |
// https://raw.githubusercontent.com/combust/mleap/master/mleap-benchmark/src/main/resources/models/airbnb.model.rf.zip | |
File file = new File("airbnb.model.rf.zip"); | |
BundleBuilder bundleBuilder = new BundleBuilder(); | |
Bundle<Transformer> b = bundleBuilder.load(file, context); | |
Transformer transformer = b.root(); | |
StructType fullSchema = transformer.schema(); | |
printSchema("full", fullSchema); | |
List<StructField> fullFields = JavaConversions.seqAsJavaList(fullSchema.fields()); | |
StructType inputSchema = transformer.inputSchema(); | |
printSchema("input", inputSchema); | |
StructType outputSchema = transformer.outputSchema(); | |
printSchema("output", outputSchema); | |
RowTransformer rt = transformer.transform(new RowTransformerSupport().createRowTransformer(inputSchema)).get(); | |
LeapFrameBuilder frameBuilder = new LeapFrameBuilder(); | |
Row r = frameBuilder.createRow(100.0, 2.0, "0.0", "Entire home/apt", "NY", "strict", 500.0, 75.0, 2.0, 1.0, | |
"0.0", 4.3, 25.0); | |
DefaultLeapFrame frame = frameBuilder.createFrame(inputSchema, Arrays.asList(r)); | |
DefaultLeapFrame frameResult = transformer.transform(frame).get(); | |
printSchema("frame", frameResult.schema()); | |
Row result = rt.transform(r); | |
System.out.println(String.format("result fields: %d", result.size())); | |
for (int i = 0; i < result.size(); i++) { | |
Object val = result.getRaw(i); | |
if (val == null) { | |
System.out.println(String.format("%s | %s", fullFields.get(i), null)); | |
} else { | |
System.out.println(String.format("%s | %s", fullFields.get(i), val.toString())); | |
} | |
} | |
} | |
private static void printSchema(String label, StructType schema) { | |
List<StructField> fields = JavaConversions.seqAsJavaList(schema.fields()); | |
System.out.println(String.format("%s fields: %d", label, fields.size())); | |
for (StructField field : fields) { | |
System.out.println(field.toString()); | |
} | |
System.out.println(); | |
System.out.println(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
full fields: 22 | |
StructField(security_deposit,ScalarType(double,true)) | |
StructField(bedrooms,ScalarType(double,true)) | |
StructField(instant_bookable,ScalarType(string,true)) | |
StructField(room_type,ScalarType(string,true)) | |
StructField(state,ScalarType(string,true)) | |
StructField(cancellation_policy,ScalarType(string,true)) | |
StructField(square_feet,ScalarType(double,true)) | |
StructField(number_of_reviews,ScalarType(double,true)) | |
StructField(extra_people,ScalarType(double,true)) | |
StructField(bathrooms,ScalarType(double,true)) | |
StructField(host_is_superhost,ScalarType(string,true)) | |
StructField(review_scores_rating,ScalarType(double,true)) | |
StructField(cleaning_fee,ScalarType(double,true)) | |
StructField(room_type_index,ScalarType(double,false)) | |
StructField(features_rf,TensorType(double,Some(WrappedArray(13)),true)) | |
StructField(cancellation_policy_index,ScalarType(double,false)) | |
StructField(host_is_superhost_index,ScalarType(double,false)) | |
StructField(instant_bookable_index,ScalarType(double,false)) | |
StructField(unscaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true)) | |
StructField(scaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true)) | |
StructField(state_index,ScalarType(double,false)) | |
StructField(price_prediction,ScalarType(double,false)) | |
input fields: 13 | |
StructField(security_deposit,ScalarType(double,true)) | |
StructField(bedrooms,ScalarType(double,true)) | |
StructField(instant_bookable,ScalarType(string,true)) | |
StructField(room_type,ScalarType(string,true)) | |
StructField(state,ScalarType(string,true)) | |
StructField(cancellation_policy,ScalarType(string,true)) | |
StructField(square_feet,ScalarType(double,true)) | |
StructField(number_of_reviews,ScalarType(double,true)) | |
StructField(extra_people,ScalarType(double,true)) | |
StructField(bathrooms,ScalarType(double,true)) | |
StructField(host_is_superhost,ScalarType(string,true)) | |
StructField(review_scores_rating,ScalarType(double,true)) | |
StructField(cleaning_fee,ScalarType(double,true)) | |
output fields: 9 | |
StructField(room_type_index,ScalarType(double,false)) | |
StructField(features_rf,TensorType(double,Some(WrappedArray(13)),true)) | |
StructField(cancellation_policy_index,ScalarType(double,false)) | |
StructField(host_is_superhost_index,ScalarType(double,false)) | |
StructField(instant_bookable_index,ScalarType(double,false)) | |
StructField(unscaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true)) | |
StructField(scaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true)) | |
StructField(state_index,ScalarType(double,false)) | |
StructField(price_prediction,ScalarType(double,false)) | |
frame fields: 22 | |
StructField(security_deposit,ScalarType(double,true)) | |
StructField(bedrooms,ScalarType(double,true)) | |
StructField(instant_bookable,ScalarType(string,true)) | |
StructField(room_type,ScalarType(string,true)) | |
StructField(state,ScalarType(string,true)) | |
StructField(cancellation_policy,ScalarType(string,true)) | |
StructField(square_feet,ScalarType(double,true)) | |
StructField(number_of_reviews,ScalarType(double,true)) | |
StructField(extra_people,ScalarType(double,true)) | |
StructField(bathrooms,ScalarType(double,true)) | |
StructField(host_is_superhost,ScalarType(string,true)) | |
StructField(review_scores_rating,ScalarType(double,true)) | |
StructField(cleaning_fee,ScalarType(double,true)) | |
StructField(unscaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true)) | |
StructField(scaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true)) | |
StructField(room_type_index,ScalarType(double,false)) | |
StructField(host_is_superhost_index,ScalarType(double,false)) | |
StructField(cancellation_policy_index,ScalarType(double,false)) | |
StructField(instant_bookable_index,ScalarType(double,false)) | |
StructField(state_index,ScalarType(double,false)) | |
StructField(features_rf,TensorType(double,Some(WrappedArray(13)),true)) | |
StructField(price_prediction,ScalarType(double,false)) | |
result fields: 22 | |
StructField(security_deposit,ScalarType(double,true)) | 100.0 | |
StructField(bedrooms,ScalarType(double,true)) | 2.0 | |
StructField(instant_bookable,ScalarType(string,true)) | 0.0 | |
StructField(room_type,ScalarType(string,true)) | Entire home/apt | |
StructField(state,ScalarType(string,true)) | NY | |
StructField(cancellation_policy,ScalarType(string,true)) | strict | |
StructField(square_feet,ScalarType(double,true)) | 500.0 | |
StructField(number_of_reviews,ScalarType(double,true)) | 75.0 | |
StructField(extra_people,ScalarType(double,true)) | 2.0 | |
StructField(bathrooms,ScalarType(double,true)) | 1.0 | |
StructField(host_is_superhost,ScalarType(string,true)) | 0.0 | |
StructField(review_scores_rating,ScalarType(double,true)) | 4.3 | |
StructField(cleaning_fee,ScalarType(double,true)) | 25.0 | |
StructField(room_type_index,ScalarType(double,false)) | DenseTensor([D@1b01a0d,List(8)) | |
StructField(features_rf,TensorType(double,Some(WrappedArray(13)),true)) | DenseTensor([D@2ed71727,List(8)) | |
StructField(cancellation_policy_index,ScalarType(double,false)) | 0.0 | |
StructField(host_is_superhost_index,ScalarType(double,false)) | 0.0 | |
StructField(instant_bookable_index,ScalarType(double,false)) | 0.0 | |
StructField(unscaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true)) | 0.0 | |
StructField(scaled_continuous_features,TensorType(double,Some(WrappedArray(8)),true)) | 1.0 | |
StructField(state_index,ScalarType(double,false)) | DenseTensor([D@5261ec9,List(13)) | |
StructField(price_prediction,ScalarType(double,false)) | 144.2690841182275 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment