count.java
mapper.java
reducer.java
- Create a new eclipse project
- give name
maximum_log
- It should be as follows:
maximum_log
-- src
-- maximum_log
- count.java
- mapper.java
- reducer.java
- Now
Right Click on Project
->Build Path
->Add External Archives..
- Then select
other location
->computer
->user
->local
->hadoop
->share
->hadoop
->hdfs
HDFS Folder - select
hadoop-hdfs-3.3.4-jar
- select
hadoop-hdfs-client-3.3.4-jar
Map Reducer folder
- select
hadoop_map_reducer_client_core-3.3.4.jar
- select
hadoop_map_reducer_client_common-3.3.4.jar
Common Folder
-
Select
hadoop-common-3.3.3.jar
-
Exporting the Project Click on
File
->Export
->select java
->select jarfille
->select name of project
->Browse
->Downloads
->Save
->finish
-> mycount.jar
start-all.sh
ORstart-dfs.sh
,start-yarn.sh
- create the directory on the hadoop
hdfs dfs -mkdir /dir_name
- put the file from pc to hadoop
- cd to input file
hdfs dfs -put input_file_Name /dir_name
- open Eclipse and create java project
- create the package of the name my
- chage the version to java 1.7
(
right click on project
->propertise
->click on java compiler
->untick the JAVA SE 17
->choose version from dropdown 1.7
->apply and close
)checking the files on the hadoop
===>> Go tolocalhost:9870
- Add the required 6 libraries
go to path ==>> other loacation/computer/usr/local/hadoop/share/hadoop |
/hdfs/hadoop-hdfs-3.3.4.jar (No.1)
/hdfs/hadoop-hdfs-client-3.3.4.jar (No.3)
/common/hadoop-common-3.3.4.jar (No.1)
/mapreduce/hadoop-mapreduce-client-common-3.3.4.jar (No.2)
/mapreduce/hadoop-mapreduce-client-core-3.3.4.jar (No.3)
/mapreduce/hadoop-mapreduce-client-jobclient-3.3.4.jar (No.6)
- go to package right click and create class
code
package sidhd;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import java.util.StringTokenizer;
import java.io.IOException;
import java.lang.InterruptedException;
public class mapper extends Mapper < Object, Text, Text, IntWritable > {
public void map(Object offset, Text key, Context con) throws IOException,
InterruptedException {
StringTokenizer token = new StringTokenizer(key.toString(), " - - ");
//while(token.hasMoreElements())
//{
con.write(new Text(token.nextToken()), new IntWritable(1));
//}
}
}
package sidhd;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import java.io.IOException;
import java.lang.InterruptedException;
public class reducer extends Reducer < Text, IntWritable, Text, IntWritable > {
public void reduce(Text key, Iterable < IntWritable > values, Context context) throws IOException,
InterruptedException {
int sum = 0;
for (IntWritable val: values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
package sidhd;
import java.io.IOException;
import java.lang.ClassNotFoundException;
import java.net.URI;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class count {
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "log count");
job.setJarByClass(count.class);
job.setMapperClass(mapper.class);
job.setReducerClass(reducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
FileSystem fs = FileSystem.get(URI.create("hdfs://localhost:9000" + args[1]), conf);
Path study = fs.listStatus(new Path(args[1]))[1].getPath();
FSDataInputStream in = null;
in = fs.open(study);
int max = 0;
String obj;
Scanner sc = new Scanner( in );
String result = null;
while (sc.hasNext()) {
obj = sc.nextLine();
String[] arrobj = obj.trim().split("\t+");
int n = Integer.parseInt(arrobj[1]);
if (n > max) {
max = n;
result = obj;
}
}
System.out.println(result);
//sc.close();
}
}
- hadoop jar
/home/hduser/Downloads/sid.jar
access_log.wordcount /datasett /siddheshop
hdfs dfs -put data.txt
hadoop jar `/home/hduser/Downloads/sid.jar` sidhd.wordcount data.txt /siddheshop
hdfs dfs -cat /siddheshop/*