Skip to content

Instantly share code, notes, and snippets.

@myui
Created June 7, 2016 11:12
Show Gist options
  • Save myui/3daa816160075bbd24c62a307b0f6b4b to your computer and use it in GitHub Desktop.
Save myui/3daa816160075bbd24c62a307b0f6b4b to your computer and use it in GitHub Desktop.
/*
* Hivemall: Hive scalable Machine Learning Library
*
* Copyright (C) 2015 Makoto YUI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package hivemall;
import hivemall.classifier.AROWClassifierUDTF;
import hivemall.classifier.AdaGradRDAUDTF;
import hivemall.classifier.ConfidenceWeightedUDTF;
import hivemall.classifier.PassiveAggressiveUDTF;
import hivemall.classifier.PerceptronUDTF;
import hivemall.classifier.SoftConfideceWeightedUDTF;
import hivemall.classifier.multiclass.MulticlassAROWClassifierUDTF;
import hivemall.classifier.multiclass.MulticlassConfidenceWeightedUDTF;
import hivemall.classifier.multiclass.MulticlassPassiveAggressiveUDTF;
import hivemall.classifier.multiclass.MulticlassPerceptronUDTF;
import hivemall.classifier.multiclass.MulticlassSoftConfidenceWeightedUDTF;
import hivemall.dataset.LogisticRegressionDataGeneratorUDTF;
import hivemall.ensemble.ArgminKLDistanceUDAF;
import hivemall.ensemble.MaxRowUDAF;
import hivemall.ensemble.MaxValueLabelUDAF;
import hivemall.ensemble.bagging.VotedAvgUDAF;
import hivemall.ensemble.bagging.WeightVotedAvgUDAF;
import hivemall.evaluation.FMeasureUDAF;
import hivemall.evaluation.LogarithmicLossUDAF;
import hivemall.evaluation.MeanAbsoluteErrorUDAF;
import hivemall.evaluation.MeanSquaredErrorUDAF;
import hivemall.evaluation.NDCGUDAF;
import hivemall.evaluation.R2UDAF;
import hivemall.evaluation.RootMeanSquaredErrorUDAF;
import hivemall.fm.FFMPredictUDF;
import hivemall.fm.FMPredictGenericUDAF;
import hivemall.fm.FactorizationMachineUDTF;
import hivemall.fm.FieldAwareFactorizationMachineUDTF;
import hivemall.ftvec.amplify.AmplifierUDTF;
import hivemall.ftvec.amplify.RandomAmplifierUDTF;
import hivemall.ftvec.conv.ConvertToDenseModelUDAF;
import hivemall.ftvec.conv.QuantifyColumnsUDTF;
import hivemall.ftvec.conv.ToDenseFeaturesUDF;
import hivemall.ftvec.conv.ToSparseFeaturesUDF;
import hivemall.ftvec.hashing.ArrayHashValuesUDF;
import hivemall.ftvec.hashing.ArrayPrefixedHashValuesUDF;
import hivemall.ftvec.hashing.FeatureHashingUDF;
import hivemall.ftvec.hashing.MurmurHash3UDF;
import hivemall.ftvec.hashing.Sha1UDF;
import hivemall.ftvec.pairing.PolynomialFeaturesUDF;
import hivemall.ftvec.pairing.PoweredFeaturesUDF;
import hivemall.ftvec.ranking.BprSamplingUDTF;
import hivemall.ftvec.ranking.ItemPairsSamplingUDTF;
import hivemall.ftvec.ranking.PopulateNotInUDTF;
import hivemall.utils.lang.Preconditions;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
@Description(
name = "list_hivemall_functions",
value = "_FUNC_() - Returns a relation consists of <string funcName, array<string> type, string usage, string className>",
extended = "Usage: WITH dual AS (SELECT 1) SELECT list_fucntions() FROM dual;")
@UDFType(deterministic = true, stateful = false)
public final class ListHivemallFunctionsUDTF extends GenericUDTF {
private static final String HIVEMALL = "Hivemall";
private static final String CLASSIFICATION = "Classification";
private static final String MULTI_CLASS_CLASSIFICATION = "Multiclass Classification";
private static final String REGRESSION = "Regression";
private static final String RECOMMENDATION = "Recommendation";
private static final String MATRIX_DECOMPOSITION = "Matrix Decomposition";
private static final String K_NN = "k-Nearest Neighbor";
private static final String ENSEMBLE = "Ensemble Learning";
private static final String DATA_GEN = "Dataset Generator";
private static final String EVAL_METRICS = "Evaluation Metrics";
private static final String FEATURE_ENGINEERING = "Feature Engineering";
private static final String DECISION_TREE = "Decision Tree";
private static final String SAMPLING = "Sampling";
public ListHivemallFunctionsUDTF() {}
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
return null;
}
@Override
public void process(Object[] args) throws HiveException {
// TODO Auto-generated method stub
}
@Override
public void close() throws HiveException {}
@Nonnull
static List<FuncDecl> listFunctions() {
final List<FuncDecl> list = new ArrayList<FuncDecl>(128);
list.add(getFunctionDecl(HivemallVersionUDF.class, HIVEMALL));
list.add(getFunctionDecl(ListHivemallFunctionsUDTF.class, HIVEMALL));
addClassifiers(list);
list.add(getFunctionDecl(LogisticRegressionDataGeneratorUDTF.class, DATA_GEN));
addEnsembleFunctions(list);
addEvaluationFunctions(list);
list.add(getFunctionDecl(FactorizationMachineUDTF.class, CLASSIFICATION, REGRESSION,
RECOMMENDATION));
list.add(getFunctionDecl(FFMPredictUDF.class, CLASSIFICATION, REGRESSION, RECOMMENDATION));
list.add(getFunctionDecl(FieldAwareFactorizationMachineUDTF.class, CLASSIFICATION,
REGRESSION, RECOMMENDATION));
list.add(getFunctionDecl(FMPredictGenericUDAF.class, CLASSIFICATION, REGRESSION,
RECOMMENDATION));
addFeatureEngineeringFunctions(list);
return list;
}
private static void addClassifiers(@Nonnull final List<FuncDecl> list) {
list.add(getFunctionDecl(AdaGradRDAUDTF.class, CLASSIFICATION));
list.add(getFunctionDecl(AROWClassifierUDTF.class, CLASSIFICATION));
list.add(getFunctionDecl(AROWClassifierUDTF.AROWh.class, CLASSIFICATION));
list.add(getFunctionDecl(ConfidenceWeightedUDTF.class, CLASSIFICATION));
list.add(getFunctionDecl(PassiveAggressiveUDTF.class, CLASSIFICATION));
list.add(getFunctionDecl(PassiveAggressiveUDTF.PA1.class, CLASSIFICATION));
list.add(getFunctionDecl(PassiveAggressiveUDTF.PA2.class, CLASSIFICATION));
list.add(getFunctionDecl(PerceptronUDTF.class, CLASSIFICATION));
list.add(getFunctionDecl(SoftConfideceWeightedUDTF.SCW1.class, CLASSIFICATION));
list.add(getFunctionDecl(SoftConfideceWeightedUDTF.SCW2.class, CLASSIFICATION));
list.add(getFunctionDecl(MulticlassAROWClassifierUDTF.class, MULTI_CLASS_CLASSIFICATION));
list.add(getFunctionDecl(MulticlassAROWClassifierUDTF.AROWh.class,
MULTI_CLASS_CLASSIFICATION));
list.add(getFunctionDecl(MulticlassConfidenceWeightedUDTF.class, MULTI_CLASS_CLASSIFICATION));
list.add(getFunctionDecl(MulticlassPassiveAggressiveUDTF.class, MULTI_CLASS_CLASSIFICATION));
list.add(getFunctionDecl(MulticlassPassiveAggressiveUDTF.PA1.class,
MULTI_CLASS_CLASSIFICATION));
list.add(getFunctionDecl(MulticlassPassiveAggressiveUDTF.PA2.class,
MULTI_CLASS_CLASSIFICATION));
list.add(getFunctionDecl(MulticlassPerceptronUDTF.class, MULTI_CLASS_CLASSIFICATION));
list.add(getFunctionDecl(MulticlassSoftConfidenceWeightedUDTF.SCW1.class,
MULTI_CLASS_CLASSIFICATION));
list.add(getFunctionDecl(MulticlassSoftConfidenceWeightedUDTF.SCW2.class,
MULTI_CLASS_CLASSIFICATION));
}
private static void addEnsembleFunctions(@Nonnull final List<FuncDecl> list) {
list.add(getFunctionDecl(ArgminKLDistanceUDAF.class, ENSEMBLE));
list.add(getFunctionDecl(MaxRowUDAF.class, ENSEMBLE));
list.add(getFunctionDecl(MaxValueLabelUDAF.class, ENSEMBLE));
list.add(getFunctionDecl(VotedAvgUDAF.class, ENSEMBLE));
list.add(getFunctionDecl(WeightVotedAvgUDAF.class, ENSEMBLE));
}
private static void addEvaluationFunctions(@Nonnull final List<FuncDecl> list) {
list.add(getFunctionDecl(FMeasureUDAF.class, EVAL_METRICS));
list.add(getFunctionDecl(LogarithmicLossUDAF.class, EVAL_METRICS));
list.add(getFunctionDecl(MeanAbsoluteErrorUDAF.class, EVAL_METRICS));
list.add(getFunctionDecl(MeanSquaredErrorUDAF.class, EVAL_METRICS));
list.add(getFunctionDecl(NDCGUDAF.class, EVAL_METRICS));
list.add(getFunctionDecl(R2UDAF.class, EVAL_METRICS));
list.add(getFunctionDecl(RootMeanSquaredErrorUDAF.class, EVAL_METRICS));
}
private static void addFeatureEngineeringFunctions(@Nonnull final List<FuncDecl> list) {
list.add(getFunctionDecl(AmplifierUDTF.class, FEATURE_ENGINEERING));
list.add(getFunctionDecl(RandomAmplifierUDTF.class, FEATURE_ENGINEERING));
list.add(getFunctionDecl(ConvertToDenseModelUDAF.class, FEATURE_ENGINEERING));
list.add(getFunctionDecl(QuantifyColumnsUDTF.class, FEATURE_ENGINEERING));
list.add(getFunctionDecl(ToDenseFeaturesUDF.class, FEATURE_ENGINEERING));
list.add(getFunctionDecl(ToSparseFeaturesUDF.class, FEATURE_ENGINEERING));
list.add(getFunctionDecl(ArrayHashValuesUDF.class, FEATURE_ENGINEERING));
list.add(getFunctionDecl(ArrayPrefixedHashValuesUDF.class, FEATURE_ENGINEERING));
list.add(getFunctionDecl(FeatureHashingUDF.class, FEATURE_ENGINEERING));
list.add(getFunctionDecl(MurmurHash3UDF.class, FEATURE_ENGINEERING));
list.add(getFunctionDecl(Sha1UDF.class, FEATURE_ENGINEERING));
list.add(getFunctionDecl(PolynomialFeaturesUDF.class, FEATURE_ENGINEERING));
list.add(getFunctionDecl(PoweredFeaturesUDF.class, FEATURE_ENGINEERING));
list.add(getFunctionDecl(BprSamplingUDTF.class, SAMPLING, RECOMMENDATION, FEATURE_ENGINEERING));
list.add(getFunctionDecl(ItemPairsSamplingUDTF.class, SAMPLING, RECOMMENDATION, FEATURE_ENGINEERING));
list.add(getFunctionDecl(PopulateNotInUDTF.class, SAMPLING, RECOMMENDATION, FEATURE_ENGINEERING));
}
private static FuncDecl getFunctionDecl(@Nonnull final Class<?> clazz, @Nonnull String... types) {
Preconditions.checkArgument(types.length > 0, "Function types are not provided");
Description desc = clazz.getAnnotation(Description.class);
String name = desc.name();
String usage = desc.value().replace("_FUNC_",
name.length() == 0 ? clazz.getSimpleName() : name);
String extended = desc.extended();
String className = clazz.getName();
return new FuncDecl(name, types, usage, extended, className);
}
static final class FuncDecl {
@Nonnull
final String name;
@Nonnull
final String[] type;
@Nonnull
final String usage;
@Nullable
final String extended;
@Nonnull
final String clazz;
FuncDecl(@Nonnull String name, @Nonnull String[] type, @Nonnull String usage,
@Nullable String extended, @Nonnull String clazz) {
this.name = name;
this.type = type;
this.usage = usage;
this.extended = extended;
this.clazz = clazz;
}
@Override
public String toString() {
return "name=" + name + ", type=" + Arrays.toString(type) + ", usage=" + usage
+ ", extended=" + extended + ", clazz=" + clazz;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment