Created
June 7, 2016 11:12
-
-
Save myui/3daa816160075bbd24c62a307b0f6b4b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Hivemall: Hive scalable Machine Learning Library | |
* | |
* Copyright (C) 2015 Makoto YUI | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
package hivemall; | |
import hivemall.classifier.AROWClassifierUDTF; | |
import hivemall.classifier.AdaGradRDAUDTF; | |
import hivemall.classifier.ConfidenceWeightedUDTF; | |
import hivemall.classifier.PassiveAggressiveUDTF; | |
import hivemall.classifier.PerceptronUDTF; | |
import hivemall.classifier.SoftConfideceWeightedUDTF; | |
import hivemall.classifier.multiclass.MulticlassAROWClassifierUDTF; | |
import hivemall.classifier.multiclass.MulticlassConfidenceWeightedUDTF; | |
import hivemall.classifier.multiclass.MulticlassPassiveAggressiveUDTF; | |
import hivemall.classifier.multiclass.MulticlassPerceptronUDTF; | |
import hivemall.classifier.multiclass.MulticlassSoftConfidenceWeightedUDTF; | |
import hivemall.dataset.LogisticRegressionDataGeneratorUDTF; | |
import hivemall.ensemble.ArgminKLDistanceUDAF; | |
import hivemall.ensemble.MaxRowUDAF; | |
import hivemall.ensemble.MaxValueLabelUDAF; | |
import hivemall.ensemble.bagging.VotedAvgUDAF; | |
import hivemall.ensemble.bagging.WeightVotedAvgUDAF; | |
import hivemall.evaluation.FMeasureUDAF; | |
import hivemall.evaluation.LogarithmicLossUDAF; | |
import hivemall.evaluation.MeanAbsoluteErrorUDAF; | |
import hivemall.evaluation.MeanSquaredErrorUDAF; | |
import hivemall.evaluation.NDCGUDAF; | |
import hivemall.evaluation.R2UDAF; | |
import hivemall.evaluation.RootMeanSquaredErrorUDAF; | |
import hivemall.fm.FFMPredictUDF; | |
import hivemall.fm.FMPredictGenericUDAF; | |
import hivemall.fm.FactorizationMachineUDTF; | |
import hivemall.fm.FieldAwareFactorizationMachineUDTF; | |
import hivemall.ftvec.amplify.AmplifierUDTF; | |
import hivemall.ftvec.amplify.RandomAmplifierUDTF; | |
import hivemall.ftvec.conv.ConvertToDenseModelUDAF; | |
import hivemall.ftvec.conv.QuantifyColumnsUDTF; | |
import hivemall.ftvec.conv.ToDenseFeaturesUDF; | |
import hivemall.ftvec.conv.ToSparseFeaturesUDF; | |
import hivemall.ftvec.hashing.ArrayHashValuesUDF; | |
import hivemall.ftvec.hashing.ArrayPrefixedHashValuesUDF; | |
import hivemall.ftvec.hashing.FeatureHashingUDF; | |
import hivemall.ftvec.hashing.MurmurHash3UDF; | |
import hivemall.ftvec.hashing.Sha1UDF; | |
import hivemall.ftvec.pairing.PolynomialFeaturesUDF; | |
import hivemall.ftvec.pairing.PoweredFeaturesUDF; | |
import hivemall.ftvec.ranking.BprSamplingUDTF; | |
import hivemall.ftvec.ranking.ItemPairsSamplingUDTF; | |
import hivemall.ftvec.ranking.PopulateNotInUDTF; | |
import hivemall.utils.lang.Preconditions; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.List; | |
import javax.annotation.Nonnull; | |
import javax.annotation.Nullable; | |
import org.apache.hadoop.hive.ql.exec.Description; | |
import org.apache.hadoop.hive.ql.exec.UDFArgumentException; | |
import org.apache.hadoop.hive.ql.metadata.HiveException; | |
import org.apache.hadoop.hive.ql.udf.UDFType; | |
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; | |
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; | |
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; | |
@Description( | |
name = "list_hivemall_functions", | |
value = "_FUNC_() - Returns a relation consists of <string funcName, array<string> type, string usage, string className>", | |
extended = "Usage: WITH dual AS (SELECT 1) SELECT list_fucntions() FROM dual;") | |
@UDFType(deterministic = true, stateful = false) | |
public final class ListHivemallFunctionsUDTF extends GenericUDTF { | |
private static final String HIVEMALL = "Hivemall"; | |
private static final String CLASSIFICATION = "Classification"; | |
private static final String MULTI_CLASS_CLASSIFICATION = "Multiclass Classification"; | |
private static final String REGRESSION = "Regression"; | |
private static final String RECOMMENDATION = "Recommendation"; | |
private static final String MATRIX_DECOMPOSITION = "Matrix Decomposition"; | |
private static final String K_NN = "k-Nearest Neighbor"; | |
private static final String ENSEMBLE = "Ensemble Learning"; | |
private static final String DATA_GEN = "Dataset Generator"; | |
private static final String EVAL_METRICS = "Evaluation Metrics"; | |
private static final String FEATURE_ENGINEERING = "Feature Engineering"; | |
private static final String DECISION_TREE = "Decision Tree"; | |
private static final String SAMPLING = "Sampling"; | |
public ListHivemallFunctionsUDTF() {} | |
@Override | |
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { | |
return null; | |
} | |
@Override | |
public void process(Object[] args) throws HiveException { | |
// TODO Auto-generated method stub | |
} | |
@Override | |
public void close() throws HiveException {} | |
@Nonnull | |
static List<FuncDecl> listFunctions() { | |
final List<FuncDecl> list = new ArrayList<FuncDecl>(128); | |
list.add(getFunctionDecl(HivemallVersionUDF.class, HIVEMALL)); | |
list.add(getFunctionDecl(ListHivemallFunctionsUDTF.class, HIVEMALL)); | |
addClassifiers(list); | |
list.add(getFunctionDecl(LogisticRegressionDataGeneratorUDTF.class, DATA_GEN)); | |
addEnsembleFunctions(list); | |
addEvaluationFunctions(list); | |
list.add(getFunctionDecl(FactorizationMachineUDTF.class, CLASSIFICATION, REGRESSION, | |
RECOMMENDATION)); | |
list.add(getFunctionDecl(FFMPredictUDF.class, CLASSIFICATION, REGRESSION, RECOMMENDATION)); | |
list.add(getFunctionDecl(FieldAwareFactorizationMachineUDTF.class, CLASSIFICATION, | |
REGRESSION, RECOMMENDATION)); | |
list.add(getFunctionDecl(FMPredictGenericUDAF.class, CLASSIFICATION, REGRESSION, | |
RECOMMENDATION)); | |
addFeatureEngineeringFunctions(list); | |
return list; | |
} | |
private static void addClassifiers(@Nonnull final List<FuncDecl> list) { | |
list.add(getFunctionDecl(AdaGradRDAUDTF.class, CLASSIFICATION)); | |
list.add(getFunctionDecl(AROWClassifierUDTF.class, CLASSIFICATION)); | |
list.add(getFunctionDecl(AROWClassifierUDTF.AROWh.class, CLASSIFICATION)); | |
list.add(getFunctionDecl(ConfidenceWeightedUDTF.class, CLASSIFICATION)); | |
list.add(getFunctionDecl(PassiveAggressiveUDTF.class, CLASSIFICATION)); | |
list.add(getFunctionDecl(PassiveAggressiveUDTF.PA1.class, CLASSIFICATION)); | |
list.add(getFunctionDecl(PassiveAggressiveUDTF.PA2.class, CLASSIFICATION)); | |
list.add(getFunctionDecl(PerceptronUDTF.class, CLASSIFICATION)); | |
list.add(getFunctionDecl(SoftConfideceWeightedUDTF.SCW1.class, CLASSIFICATION)); | |
list.add(getFunctionDecl(SoftConfideceWeightedUDTF.SCW2.class, CLASSIFICATION)); | |
list.add(getFunctionDecl(MulticlassAROWClassifierUDTF.class, MULTI_CLASS_CLASSIFICATION)); | |
list.add(getFunctionDecl(MulticlassAROWClassifierUDTF.AROWh.class, | |
MULTI_CLASS_CLASSIFICATION)); | |
list.add(getFunctionDecl(MulticlassConfidenceWeightedUDTF.class, MULTI_CLASS_CLASSIFICATION)); | |
list.add(getFunctionDecl(MulticlassPassiveAggressiveUDTF.class, MULTI_CLASS_CLASSIFICATION)); | |
list.add(getFunctionDecl(MulticlassPassiveAggressiveUDTF.PA1.class, | |
MULTI_CLASS_CLASSIFICATION)); | |
list.add(getFunctionDecl(MulticlassPassiveAggressiveUDTF.PA2.class, | |
MULTI_CLASS_CLASSIFICATION)); | |
list.add(getFunctionDecl(MulticlassPerceptronUDTF.class, MULTI_CLASS_CLASSIFICATION)); | |
list.add(getFunctionDecl(MulticlassSoftConfidenceWeightedUDTF.SCW1.class, | |
MULTI_CLASS_CLASSIFICATION)); | |
list.add(getFunctionDecl(MulticlassSoftConfidenceWeightedUDTF.SCW2.class, | |
MULTI_CLASS_CLASSIFICATION)); | |
} | |
private static void addEnsembleFunctions(@Nonnull final List<FuncDecl> list) { | |
list.add(getFunctionDecl(ArgminKLDistanceUDAF.class, ENSEMBLE)); | |
list.add(getFunctionDecl(MaxRowUDAF.class, ENSEMBLE)); | |
list.add(getFunctionDecl(MaxValueLabelUDAF.class, ENSEMBLE)); | |
list.add(getFunctionDecl(VotedAvgUDAF.class, ENSEMBLE)); | |
list.add(getFunctionDecl(WeightVotedAvgUDAF.class, ENSEMBLE)); | |
} | |
private static void addEvaluationFunctions(@Nonnull final List<FuncDecl> list) { | |
list.add(getFunctionDecl(FMeasureUDAF.class, EVAL_METRICS)); | |
list.add(getFunctionDecl(LogarithmicLossUDAF.class, EVAL_METRICS)); | |
list.add(getFunctionDecl(MeanAbsoluteErrorUDAF.class, EVAL_METRICS)); | |
list.add(getFunctionDecl(MeanSquaredErrorUDAF.class, EVAL_METRICS)); | |
list.add(getFunctionDecl(NDCGUDAF.class, EVAL_METRICS)); | |
list.add(getFunctionDecl(R2UDAF.class, EVAL_METRICS)); | |
list.add(getFunctionDecl(RootMeanSquaredErrorUDAF.class, EVAL_METRICS)); | |
} | |
private static void addFeatureEngineeringFunctions(@Nonnull final List<FuncDecl> list) { | |
list.add(getFunctionDecl(AmplifierUDTF.class, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(RandomAmplifierUDTF.class, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(ConvertToDenseModelUDAF.class, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(QuantifyColumnsUDTF.class, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(ToDenseFeaturesUDF.class, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(ToSparseFeaturesUDF.class, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(ArrayHashValuesUDF.class, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(ArrayPrefixedHashValuesUDF.class, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(FeatureHashingUDF.class, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(MurmurHash3UDF.class, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(Sha1UDF.class, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(PolynomialFeaturesUDF.class, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(PoweredFeaturesUDF.class, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(BprSamplingUDTF.class, SAMPLING, RECOMMENDATION, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(ItemPairsSamplingUDTF.class, SAMPLING, RECOMMENDATION, FEATURE_ENGINEERING)); | |
list.add(getFunctionDecl(PopulateNotInUDTF.class, SAMPLING, RECOMMENDATION, FEATURE_ENGINEERING)); | |
} | |
private static FuncDecl getFunctionDecl(@Nonnull final Class<?> clazz, @Nonnull String... types) { | |
Preconditions.checkArgument(types.length > 0, "Function types are not provided"); | |
Description desc = clazz.getAnnotation(Description.class); | |
String name = desc.name(); | |
String usage = desc.value().replace("_FUNC_", | |
name.length() == 0 ? clazz.getSimpleName() : name); | |
String extended = desc.extended(); | |
String className = clazz.getName(); | |
return new FuncDecl(name, types, usage, extended, className); | |
} | |
static final class FuncDecl { | |
@Nonnull | |
final String name; | |
@Nonnull | |
final String[] type; | |
@Nonnull | |
final String usage; | |
@Nullable | |
final String extended; | |
@Nonnull | |
final String clazz; | |
FuncDecl(@Nonnull String name, @Nonnull String[] type, @Nonnull String usage, | |
@Nullable String extended, @Nonnull String clazz) { | |
this.name = name; | |
this.type = type; | |
this.usage = usage; | |
this.extended = extended; | |
this.clazz = clazz; | |
} | |
@Override | |
public String toString() { | |
return "name=" + name + ", type=" + Arrays.toString(type) + ", usage=" + usage | |
+ ", extended=" + extended + ", clazz=" + clazz; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment