Created
May 30, 2014 08:10
-
-
Save fivesmallq/c7c3ed5d3d8793ea9cb5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.security.PrivilegedExceptionAction; | |
import java.util.Iterator; | |
import java.util.StringTokenizer; | |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.conf.Configured; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.io.IntWritable; | |
import org.apache.hadoop.io.LongWritable; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.mapred.FileInputFormat; | |
import org.apache.hadoop.mapred.FileOutputFormat; | |
import org.apache.hadoop.mapred.JobClient; | |
import org.apache.hadoop.mapred.JobConf; | |
import org.apache.hadoop.mapred.MapReduceBase; | |
import org.apache.hadoop.mapred.Mapper; | |
import org.apache.hadoop.mapred.OutputCollector; | |
import org.apache.hadoop.mapred.Reducer; | |
import org.apache.hadoop.mapred.Reporter; | |
import org.apache.hadoop.mapred.TextInputFormat; | |
import org.apache.hadoop.mapred.TextOutputFormat; | |
import org.apache.hadoop.security.UserGroupInformation; | |
import org.apache.hadoop.util.Tool; | |
import org.apache.hadoop.util.ToolRunner; | |
public class WordCountRemote extends Configured implements Tool { | |
public static final String inputPath = "/in"; | |
public static final String outputPath = "/out"; | |
public static class Map extends MapReduceBase implements | |
Mapper<LongWritable, Text, Text, IntWritable> { | |
private final static IntWritable one = new IntWritable(1); | |
private final Text word = new Text(); | |
@Override | |
public void map(LongWritable key, Text value, | |
OutputCollector<Text, IntWritable> output, Reporter reporter) | |
throws IOException { | |
String line = value.toString(); | |
StringTokenizer tokenizer = new StringTokenizer(line); | |
while (tokenizer.hasMoreTokens()) { | |
// 设置分割的字符放到 | |
word.set(tokenizer.nextToken()); | |
System.out.println("Map--->键(文本):->" + String.valueOf(word) | |
+ " " + "值(个数):->" + one); | |
output.collect(word, one); | |
} | |
} | |
} | |
public static class Reduce extends MapReduceBase implements | |
Reducer<Text, IntWritable, Text, IntWritable> { | |
@Override | |
public void reduce(Text key, Iterator<IntWritable> values, | |
OutputCollector<Text, IntWritable> output, Reporter reporter) | |
throws IOException { | |
int sum = 0; | |
while (values.hasNext()) { | |
sum += values.next().get(); | |
} | |
System.out.println("Reduce--->键(文本):->" + String.valueOf(key) | |
+ " " + "值(个数):->" + sum); | |
output.collect(key, new IntWritable(sum)); | |
} | |
} | |
@Override | |
public int run(String[] args) throws Exception { | |
// http://stackoverflow.com/questions/22231904/running-hadoop-job-remotely | |
UserGroupInformation ugi = UserGroupInformation | |
.createRemoteUser("cloudera"); | |
try { | |
ugi.doAs(new PrivilegedExceptionAction<Void>() { | |
@Override | |
public Void run() throws Exception { | |
Configuration config = new Configuration(); | |
config.set("fs.default.name", "hdfs://192.168.161.138:8020"); | |
config.set("mapred.job.tracker", "192.168.161.138:8021"); | |
config.set("hadoop.job.ugi", "cloudera"); | |
config.set("user.name", "cloudera"); | |
JobConf conf = new JobConf(config, WordCount.class); | |
conf.setJobName("WordCountNew"); | |
conf.setOutputKeyClass(Text.class); | |
conf.setOutputValueClass(IntWritable.class); | |
conf.setMapperClass(Map.class); | |
conf.setReducerClass(Reduce.class); | |
conf.setInputFormat(TextInputFormat.class); | |
conf.setOutputFormat(TextOutputFormat.class); | |
FileInputFormat | |
.setInputPaths( | |
conf, | |
new Path( | |
"hdfs://192.168.161.138:8020/user/cloudera/input/word.txt")); | |
FileOutputFormat | |
.setOutputPath( | |
conf, | |
new Path( | |
"hdfs://192.168.161.138:8020/user/cloudera/output")); | |
JobClient.runJob(conf); | |
return null; | |
} | |
}); | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
return 0; | |
} | |
public static void main(String[] args) throws Exception { | |
Configuration config = new Configuration(); | |
config.set("fs.default.name", "hdfs://192.168.161.138:8020"); | |
config.set("mapred.job.tracker", "192.168.161.138:8021"); | |
config.set("hadoop.job.ugi", "cloudera"); | |
config.set("user.name", "cloudera"); | |
int res = ToolRunner.run(config, new WordCountRemote(), args); | |
System.exit(res); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment