Last active
December 11, 2015 10:19
-
-
Save thanoojgithub/b3509b20e7726ea0e0f0 to your computer and use it in GitHub Desktop.
Hive UDF - for Gender function
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.mapr.hive; | |
import org.apache.hadoop.hive.ql.exec.UDF; | |
import org.apache.hadoop.io.Text; | |
public class UDFGender extends UDF { | |
private Text result = new Text(); | |
private static final String male = "Mr."; | |
private static final String femaleM = "Mrs."; | |
private static final String femaleSingle = "Miss."; | |
public Text evaluate(Text name, Text gender, Text mStatus) { | |
System.out.println("UDFGender.evaluate() :: "+ name + " "+gender+ " "+mStatus); | |
if (name == null || gender == null) { | |
System.out.println("UDFGender.evaluate() :: "+ name + " "+gender+ " "+mStatus); | |
return null; | |
}else if((gender != null && gender.toString().equals("F")) && (mStatus != null && mStatus.toString().equals("married"))){ | |
System.out.println("UDFGender.evaluate() :: "+ name + " "+gender+ " "+mStatus); | |
result.set(femaleM +" "+ name); | |
return result; | |
}else if((gender != null && gender.toString().equals("F")) && (mStatus != null && mStatus.toString().equals("single"))) { | |
System.out.println("UDFGender.evaluate() :: "+ name + " "+gender+ " "+mStatus); | |
result.set(femaleSingle +" "+ name); | |
return result; | |
}else { | |
System.out.println("UDFGender.evaluate() :: "+ name + " "+gender+ " "+mStatus); | |
result.set(male +" "+ name); | |
return result; | |
} | |
} | |
public Text evaluate(Text name) { | |
System.out.println("UDFGender.evaluate() :: "+ name); | |
if (name == null) { | |
return null; | |
}else { | |
result.set(male +" "+ name); | |
return result; | |
} | |
} | |
} | |
/* NOTE :: | |
hive> create table thanooj.employees (ID INT, NAME STRING, GENDER STRING, MSTATUS STRING, DOB STRING, SALARY INT, TITLE STRING, DEPT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'; | |
employees.txt | |
------------- | |
110001,sriram,M,married,1989-09-12,30000,tl,d003 | |
10002,seeta,F,married,1989-09-12,30000,tl,d003 | |
10003,lakshman,M,married,1986-08-28,20000,ml,d004 | |
10004,bharatha,M,married,1986-12-01,19000,ml,d004 | |
10005,sethrugna,M,married,1989-09-12,15000,sse,d003 | |
10006,hanuma,M,single,1989-09-12,18000,sse,d003 | |
10006,ahalya,F,single,1989-09-12,18000,sse,d003 | |
hive> LOAD DATA LOCAL INPATH '/home/ubuntu/input/employees.txt' OVERWRITE INTO TABLE THANOOJ.employees; | |
hive> ADD JAR /home/ubuntu/input/UDFGender.jar; | |
Added [/home/ubuntu/input/UDFGender.jar] to class path | |
Added resources: [/home/ubuntu/input/UDFGender.jar] | |
hive> CREATE TEMPORARY FUNCTION uDFGender AS 'com.mapr.hive.UDFGender'; | |
OK | |
Time taken: 0.01 seconds | |
--------------------------------------------------------------------------------------- | |
MISC INFO :: | |
To use the UDF in Hive, we first need to package the compiled Java class in a JAR file. | |
You can do this by typing mvn package with the book’s example code. Next, we register | |
the function in the metastore and give it a name using the CREATE FUNCTION statement: | |
CREATE FUNCTION strip AS 'com.hadoopbook.hive.Strip' | |
USING JAR '/path/to/hive-examples.jar'; | |
A UDF must satisfy the following two properties: | |
A UDF must be a subclass of org.apache.hadoop.hive.ql.exec.UDF. | |
A UDF must implement at least one evaluate() method. | |
The evaluate() method is not defined by an interface, since it may take an arbitrary | |
number of arguments, of arbitrary types, and it may return a value of arbitrary type. Hive | |
introspects the UDF to find the evaluate() method that matches the Hive function that | |
was invoked. | |
When using temporary functions, it may be useful to create a .hiverc file in your home | |
directory containing the commands to define your UDFs. The file will be automatically | |
run at the beginning of each Hive session. | |
*/ | |
/* | |
OUT PUT :: | |
hive> select uDFGender(name,gender,mstatus) from thanooj.employees; | |
OK | |
UDFGender.evaluate() :: sriram M married | |
UDFGender.evaluate() :: sriram M married | |
UDFGender.evaluate() :: seeta F married | |
UDFGender.evaluate() :: seeta F married | |
UDFGender.evaluate() :: lakshman M married | |
UDFGender.evaluate() :: lakshman M married | |
UDFGender.evaluate() :: bharatha M married | |
UDFGender.evaluate() :: bharatha M married | |
UDFGender.evaluate() :: sethrugna M married | |
UDFGender.evaluate() :: sethrugna M married | |
UDFGender.evaluate() :: hanuma M single | |
UDFGender.evaluate() :: hanuma M single | |
UDFGender.evaluate() :: ahalya F single | |
UDFGender.evaluate() :: ahalya F single | |
Mr. sriram | |
Mrs. seeta | |
Mr. lakshman | |
Mr. bharatha | |
Mr. sethrugna | |
Mr. hanuma | |
Miss. ahalya | |
Time taken: 0.066 seconds, Fetched: 7 row(s) | |
hive> | |
*/ |
add input file and create table and load file - into hive metadataDB
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
added notes