Created
September 5, 2016 06:46
-
-
Save ramarov/c24bb2cf15bf18dc56bd28248a382b89 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Following sample is adopted from original wordcount sample from | |
* http://wiki.apache.org/hadoop/WordCount. | |
*/ | |
//package microbook.wordcount; | |
import java.io.IOException; | |
import java.util.StringTokenizer; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.io.IntWritable; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.mapred.JobConf; | |
import org.apache.hadoop.mapreduce.Job; | |
import org.apache.hadoop.mapreduce.Mapper; | |
import org.apache.hadoop.mapreduce.Reducer; | |
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | |
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | |
import org.apache.hadoop.util.GenericOptionsParser; | |
/** | |
* <p> | |
* The word count sample counts the number of word occurrences within a set of | |
* input documents using MapReduce. The code has three parts: mapper, reducer, | |
* and the main program. | |
* </p> | |
* | |
* @author Srinath Perera ([email protected]) | |
*/ | |
public class WordCount { | |
private final static IntWritable one = new IntWritable(1); | |
/** | |
* <p> | |
* Hadoop invokes map function once for each line in the input file, | |
* and it emits each word in the input line against one. | |
* </p> | |
*/ | |
public static class WordcountMapper extends Mapper<Object, Text, Text, IntWritable> { | |
private Text word = new Text(); | |
public void map(Object key, Text value, Context context) throws IOException, InterruptedException { | |
StringTokenizer itr = new StringTokenizer(value.toString()); | |
System.out.println("itr = " + itr); | |
System.out.println("value = " + value); | |
System.out.println("Key = " + key); | |
while (itr.hasMoreTokens()) { | |
//System.out.println("itr_nextToken = " + itr.nextToken()); | |
word.set(itr.nextToken().trim()); | |
System.out.println("word = " + word); | |
//context.write(new Text(itr.nextToken()), one); | |
/* | |
if (word.toString().equals("hello")) | |
{ | |
context.write(word,one); | |
} | |
*/ | |
context.write(word,one); | |
} | |
} | |
} | |
/** | |
* <p> | |
* Reduce function receives all the values that has the same key as the | |
* input, and it output the key and the number of occurrences of the key as | |
* the output. | |
* </p> | |
*/ | |
public static class WordcountReducer extends Reducer<Text, IntWritable, Text, IntWritable> { | |
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, | |
InterruptedException { | |
int sum = 0; | |
System.out.println("reduce key = " + key); | |
for (IntWritable val : values) { | |
sum += val.get(); | |
} | |
context.write(key, new IntWritable(sum)); | |
} | |
} | |
/** | |
* @param args | |
* @throws Exception | |
*/ | |
public static void main(String[] args) throws Exception { | |
JobConf conf = new JobConf(); | |
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); | |
if (otherArgs.length != 2) { | |
System.err.println("Usage: <in> <out>"); | |
System.exit(2); | |
} | |
Job job = new Job(conf, "word count"); | |
job.setJar("WordCount.jar"); | |
//job.setJarByClass(WordCount.class); | |
//job.setJarByClass(WordcountMapper.class); | |
//job.setJarByClass(WordcountReducer.class); | |
job.setMapperClass(WordcountMapper.class); | |
job.setReducerClass(WordcountReducer.class); | |
job.setMapOutputKeyClass(Text.class); | |
job.setMapOutputValueClass(IntWritable.class); | |
job.setOutputKeyClass(Text.class); | |
job.setOutputValueClass(IntWritable.class); | |
FileInputFormat.addInputPath(job, new Path(otherArgs[0])); | |
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); | |
System.exit(job.waitForCompletion(true) ? 0 : 1); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment