soulmachine · November 12, 2019 18:50
diff --git a/WordCountPercentage.java b/WordCountPercentage.java
 package me.soulmachine;

 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 import org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;

 import java.io.IOException;
 import java.util.StringTokenizer;

 /** Caculate the percentage of every word. */
 @SuppressWarnings("PMD.SignatureDeclareThrowsException")
 public class WordCountPercentage extends Configured implements Tool {
  public static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
    private static final transient LongWritable one = new LongWritable(1);
    private final transient Text word = new Text();
    /** Total number of lines. */
    private static final Text TOTAL_KEY = new Text("TOTAL_KEY");

    /** mapper(). */
    @Override
    public void map(final LongWritable key, final Text value, final Context context)
        throws IOException, InterruptedException {
      String line = value.toString();
      StringTokenizer tokenizer = new StringTokenizer(line);
      while (tokenizer.hasMoreTokens()) {
        word.set(tokenizer.nextToken());
        context.write(word, one);
        context.write(TOTAL_KEY, one);
      }
    }
  }


  /** Reducer. */
  public static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
    /** Implement reduce(). */
    @Override
    public void reduce(final Text key, final Iterable<LongWritable> values, final Context context)
        throws IOException, InterruptedException {
      int sum = 0;
      for (LongWritable val : values) {
        sum += val.get();
      }
      context.write(key, new LongWritable(sum));
    }
  }

  /**
   * Implement run().
   */
  @Override
  public int run(final String[] args) throws Exception {
    final Path input = new Path(args[0]);
    final Path output = new Path(args[1]);

    final Configuration conf = this.getConf();
    final Job job = Job.getInstance(conf, "WordCountPercentage: " + input + "->" + output);
    job.setJarByClass(WordCountPercentage.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(MyReducer.class);
    job.setNumReduceTasks(1);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, input);
    FileInputFormat.setInputDirRecursive(job, true);
    FileOutputFormat.setOutputPath(job, output);

    return job.waitForCompletion(true) ? 0 : 1;
  }

  /**
   * main.
   */
  public static void main(final String[] args) throws Exception {
    final int returnCode = ToolRunner.run(new Configuration(), new WordCountPercentage(), args);
    System.exit(returnCode);
  }
 }
	package me.soulmachine;

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.conf.Configured;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.io.LongWritable;
	import org.apache.hadoop.io.Text;
	import org.apache.hadoop.mapreduce.Job;
	import org.apache.hadoop.mapreduce.Mapper;
	import org.apache.hadoop.mapreduce.Reducer;
	import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
	import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
	import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
	import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
	import org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer;
	import org.apache.hadoop.util.Tool;
	import org.apache.hadoop.util.ToolRunner;

	import java.io.IOException;
	import java.util.StringTokenizer;

	/** Caculate the percentage of every word. */
	@SuppressWarnings("PMD.SignatureDeclareThrowsException")
	public class WordCountPercentage extends Configured implements Tool {
	public static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
	private static final transient LongWritable one = new LongWritable(1);
	private final transient Text word = new Text();
	/** Total number of lines. */
	private static final Text TOTAL_KEY = new Text("TOTAL_KEY");

	/** mapper(). */
	@Override
	public void map(final LongWritable key, final Text value, final Context context)
	throws IOException, InterruptedException {
	String line = value.toString();
	StringTokenizer tokenizer = new StringTokenizer(line);
	while (tokenizer.hasMoreTokens()) {
	word.set(tokenizer.nextToken());
	context.write(word, one);
	context.write(TOTAL_KEY, one);
	}
	}
	}


	/** Reducer. */
	public static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
	/** Implement reduce(). */
	@Override
	public void reduce(final Text key, final Iterable<LongWritable> values, final Context context)
	throws IOException, InterruptedException {
	int sum = 0;
	for (LongWritable val : values) {
	sum += val.get();
	}
	context.write(key, new LongWritable(sum));
	}
	}

	/**
	* Implement run().
	*/
	@Override
	public int run(final String[] args) throws Exception {
	final Path input = new Path(args[0]);
	final Path output = new Path(args[1]);

	final Configuration conf = this.getConf();
	final Job job = Job.getInstance(conf, "WordCountPercentage: " + input + "->" + output);
	job.setJarByClass(WordCountPercentage.class);

	job.setMapperClass(MyMapper.class);
	job.setCombinerClass(LongSumReducer.class);
	job.setReducerClass(MyReducer.class);
	job.setNumReduceTasks(1);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(LongWritable.class);

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);

	FileInputFormat.addInputPath(job, input);
	FileInputFormat.setInputDirRecursive(job, true);
	FileOutputFormat.setOutputPath(job, output);

	return job.waitForCompletion(true) ? 0 : 1;
	}

	/**
	* main.
	*/
	public static void main(final String[] args) throws Exception {
	final int returnCode = ToolRunner.run(new Configuration(), new WordCountPercentage(), args);
	System.exit(returnCode);
	}
	}