Skip to content

Instantly share code, notes, and snippets.

@geofferyzh
Created August 27, 2012 14:52
Show Gist options
  • Save geofferyzh/3489203 to your computer and use it in GitHub Desktop.
Save geofferyzh/3489203 to your computer and use it in GitHub Desktop.
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class pymk_mapper extends MapReduceBase implements
Mapper<LongWritable, Text, Text, Text> {
@Override
public void map(LongWritable key, Text value,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
String[] s = value.toString().split("\t");
output.collect(new Text(s[0]), new Text(s[1]));
}
}
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class pymk_reducer extends MapReduceBase implements
Reducer<Text, Text, Text, Text> {
@Override
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
String out = "";
while (values.hasNext()) {
Text value = values.next();
if (out == "") {
out = value.toString();
}
else {
String out_tmp = "," + out + ",";
String value_tmp = "," + value.toString() + ",";
if (out_tmp.lastIndexOf(value_tmp) <0 ) {
out = out + "," + value.toString();
}
}
}
output.collect(key, new Text(out));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment