ramarov · September 5, 2016 06:46
diff --git a/WordCount.java b/WordCount.java
 /**
 * Following sample is adopted from original wordcount sample from
 * http://wiki.apache.org/hadoop/WordCount.
 */
 //package microbook.wordcount;

 import java.io.IOException;
 import java.util.StringTokenizer;

 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.util.GenericOptionsParser;

 /**
 * <p>
 * The word count sample counts the number of word occurrences within a set of
 * input documents using MapReduce. The code has three parts: mapper, reducer,
 * and the main program.
 * </p>
 *
 * @author Srinath Perera ([email protected])
 */
 public class WordCount {
    private final static IntWritable one = new IntWritable(1);

    /**
     * <p>
     * Hadoop invokes map function once for each line in the input file,
     * and it emits each word in the input line against one.
     * </p>
     */


    public static class WordcountMapper extends Mapper<Object, Text, Text, IntWritable> {

        private Text word = new Text();

        public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            StringTokenizer itr = new StringTokenizer(value.toString());
                System.out.println("itr = " + itr);
                System.out.println("value = " + value);
                System.out.println("Key = " + key);
            while (itr.hasMoreTokens()) {
                //System.out.println("itr_nextToken = " + itr.nextToken());
                word.set(itr.nextToken().trim());
                System.out.println("word = " + word);
                //context.write(new Text(itr.nextToken()), one);

        /*
                if (word.toString().equals("hello"))

                {
                        context.write(word,one);
                }
        */

                context.write(word,one);

            }
        }
    }

    /**
     * <p>
     * Reduce function receives all the values that has the same key as the
     * input, and it output the key and the number of occurrences of the key as
     * the output.
     * </p>
     */
    public static class WordcountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException,
                InterruptedException {
            int sum = 0;
        System.out.println("reduce key = " + key);
            for (IntWritable val : values) {
                sum += val.get();
            }
            context.write(key, new IntWritable(sum));
        }
    }

    /**
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {
        JobConf conf = new JobConf();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        if (otherArgs.length != 2) {
            System.err.println("Usage: <in> <out>");
            System.exit(2);
        }

        Job job = new Job(conf, "word count");
        job.setJar("WordCount.jar");

        //job.setJarByClass(WordCount.class);
        //job.setJarByClass(WordcountMapper.class);
        //job.setJarByClass(WordcountReducer.class);
        job.setMapperClass(WordcountMapper.class);
        job.setReducerClass(WordcountReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
 }
	/**
	* Following sample is adopted from original wordcount sample from
	* http://wiki.apache.org/hadoop/WordCount.
	*/
	//package microbook.wordcount;

	import java.io.IOException;
	import java.util.StringTokenizer;

	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.io.IntWritable;
	import org.apache.hadoop.io.Text;
	import org.apache.hadoop.mapred.JobConf;
	import org.apache.hadoop.mapreduce.Job;
	import org.apache.hadoop.mapreduce.Mapper;
	import org.apache.hadoop.mapreduce.Reducer;
	import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
	import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
	import org.apache.hadoop.util.GenericOptionsParser;

	/**
	* <p>
	* The word count sample counts the number of word occurrences within a set of
	* input documents using MapReduce. The code has three parts: mapper, reducer,
	* and the main program.
	* </p>
	*
	* @author Srinath Perera ([email protected])
	*/
	public class WordCount {
	private final static IntWritable one = new IntWritable(1);

	/**
	* <p>
	* Hadoop invokes map function once for each line in the input file,
	* and it emits each word in the input line against one.
	* </p>
	*/


	public static class WordcountMapper extends Mapper<Object, Text, Text, IntWritable> {

	private Text word = new Text();

	public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
	StringTokenizer itr = new StringTokenizer(value.toString());
	System.out.println("itr = " + itr);
	System.out.println("value = " + value);
	System.out.println("Key = " + key);
	while (itr.hasMoreTokens()) {
	//System.out.println("itr_nextToken = " + itr.nextToken());
	word.set(itr.nextToken().trim());
	System.out.println("word = " + word);
	//context.write(new Text(itr.nextToken()), one);

	/*
	if (word.toString().equals("hello"))

	{
	context.write(word,one);
	}
	*/

	context.write(word,one);

	}
	}
	}

	/**
	* <p>
	* Reduce function receives all the values that has the same key as the
	* input, and it output the key and the number of occurrences of the key as
	* the output.
	* </p>
	*/
	public static class WordcountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
	public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException,
	InterruptedException {
	int sum = 0;
	System.out.println("reduce key = " + key);
	for (IntWritable val : values) {
	sum += val.get();
	}
	context.write(key, new IntWritable(sum));
	}
	}

	/**
	* @param args
	* @throws Exception
	*/
	public static void main(String[] args) throws Exception {
	JobConf conf = new JobConf();
	String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
	if (otherArgs.length != 2) {
	System.err.println("Usage: <in> <out>");
	System.exit(2);
	}

	Job job = new Job(conf, "word count");
	job.setJar("WordCount.jar");

	//job.setJarByClass(WordCount.class);
	//job.setJarByClass(WordcountMapper.class);
	//job.setJarByClass(WordcountReducer.class);
	job.setMapperClass(WordcountMapper.class);
	job.setReducerClass(WordcountReducer.class);

	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(IntWritable.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);

	FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
	System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
	}