Skip to content

Instantly share code, notes, and snippets.

@tf0054
Created March 22, 2012 14:47
Show Gist options
  • Save tf0054/2158735 to your computer and use it in GitHub Desktop.
Save tf0054/2158735 to your computer and use it in GitHub Desktop.
Hack#15
package org.hadoophacks.mapreduce;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.hadoophacks.mapreduce.lib.input.CustomCombineFileInputFormat;
public class CombineFileGenerator extends Configured implements Tool{
@Override
public int run(String[] args)throws Exception{
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setJobName("CombineFIleGenerator");
job.setJarByClass(CombineFileGenerator.class);
job.setMapperClass(CombineFileGeneratorMapper.class);
job.setReducerClass(CombineFileGeneratorReducer.class);
job.setInputFormatClass(CustomCombineFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job , new Path(args[0]));
FileOutputFormat.setOutputPath(job , new Path(args[1]));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
return (job.waitForCompletion(true)) ? 0 : 1;
}
public static class CombineFileGeneratorMapper extends Mapper<Text , Text , Text , Text>{
@Override
public void map(Text ikey , Text ival , Context context)throws IOException , InterruptedException{
context.write(ikey , ival);
}
}
public static class CombineFileGeneratorReducer extends Reducer<Text , Text , Text , Text>{
@Override
public void reduce(Text ikey , Iterable<Text> ivals , Context context)throws IOException , InterruptedException{
Iterator<Text> iterator = ivals.iterator();
while(iterator.hasNext()){
context.write(ikey , iterator.next());
}
}
}
public static void main(String[] args)throws Exception{
int returnCode = ToolRunner.run(new CombineFileGenerator() , args);
System.exit(returnCode);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment