Created
May 21, 2014 08:46
-
-
Save 91pavan/92a8b8c6dd24990efa53 to your computer and use it in GitHub Desktop.
CustomMap pig UDF to return non-null bag of tuples
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.example.pigudf; | |
import java.io.IOException; | |
import java.util.Map; | |
import java.util.HashMap; | |
import org.apache.pig.EvalFunc; | |
import org.apache.pig.data.DataType; | |
import org.apache.pig.data.Tuple; | |
import org.apache.pig.impl.logicalLayer.schema.Schema; | |
/* | |
* | |
* Build: | |
mkdir target | |
javac -d target -cp ../pig.jar ToSparseMap.java | |
jar -cf target/ToSparseMap.jar -C target/ . | |
* Pig Usage: | |
file.pig -> | |
*REGISTER 'CustomMap.jar' | |
*DEFINE CustomMap com.example.pigudf.CustomMap(); | |
* | |
* This class generates a map out of the parameters passed to it & filters out the key-value pair if the value is null or empty | |
* T = foreach U generate TOMAP($0, $1, $2, $3); | |
* It generates a map $0->$1, $2->$3 | |
*/ | |
public class CustomMap extends EvalFunc<Map<String, String>> { | |
@Override | |
public Map<String,String> exec(Tuple input) throws IOException { | |
if(input == null || input.size() < 2) { | |
return null; | |
} | |
try { | |
Map<String, String> output = new HashMap<String, String>(); | |
for(int i=0; i<input.size();i=i+2) { | |
String key = (String)input.get(i); | |
String value = (String)input.get(i+1); | |
if(val!=null&&val.length()>0){ | |
output.put(key,value); | |
} | |
} | |
return output; | |
catch (ClassCastException e){ | |
throw new RuntimeException("Map key must be a String"); | |
} catch (ArrayIndexOutOfBoundsException e){ | |
throw new RuntimeException("Function input must have even number of parameters"); | |
} catch (Exception e) { | |
throw new RuntimeException("Error while creating a map", e); | |
} | |
} | |
@Override | |
public Schema outputSchema(Schema input) { | |
return new Schema(new Schema.FieldSchema(null, DataType.MAP)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment