Skip to content

Instantly share code, notes, and snippets.

@myui
Created July 16, 2015 11:17
Show Gist options
  • Save myui/c095d9a837846658882a to your computer and use it in GitHub Desktop.
Save myui/c095d9a837846658882a to your computer and use it in GitHub Desktop.
GreaterThanOrEqualsToUDTF.java
/*
* Hivemall: Hive scalable Machine Learning Library
*
* Copyright (C) 2015 Makoto YUI
* Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package hivemall.tools;
import hivemall.utils.hadoop.HiveUtils;
import java.util.ArrayList;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
@Description(name = "greater_than_or_equals_to", value = "_FUNC_(const double threshold, double cmpKey, *) - Returns tuples that cmpKey is above threshold")
public final class GreatherThanOrEqualsToUDTF extends GenericUDTF {
private PrimitiveObjectInspector cmpKeyOI;
private double threshold;
private DoubleWritable keyProbe;
private Object[] rowProbe;
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
final int numArgs = argOIs.length;
if(numArgs < 3) {
throw new UDFArgumentException("greater_than_or_equals_to(const double threshold, double cmpKey, *) takes at least 3 arguments: "
+ numArgs);
}
this.threshold = HiveUtils.getAsConstDouble(argOIs[0]);
this.cmpKeyOI = HiveUtils.asDoubleCompatibleOI(argOIs[1]);
this.rowProbe = new Object[numArgs - 1];
this.keyProbe = new DoubleWritable(Double.NaN);
rowProbe[0] = keyProbe;
final ArrayList<String> fieldNames = new ArrayList<String>(numArgs);
final ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(numArgs);
fieldNames.add("key");
fieldOIs.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
for(int i = 2; i < numArgs; i++) {
fieldNames.add("c" + (i - 1));
fieldOIs.add(argOIs[i]);
}
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
@Override
public void process(Object[] args) throws HiveException {
final double key = PrimitiveObjectInspectorUtils.getDouble(args[1], cmpKeyOI);
if(key < threshold) {
return;
}
keyProbe.set(key);
final Object[] row = rowProbe;
for(int i = 2; i < args.length; i++) {
row[i - 1] = args[i];
}
forward(row);
}
@Override
public void close() throws HiveException {
this.keyProbe = null;
this.rowProbe = null;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment