Step 8: set java build path (class path) by Right Click on ->WordCountProject-> Properties->JavaBuildPath->Libaries->Click on Add jar and find three jar file in lib folder of WordCount project.

java build path

NewWordCount.java

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public classNewWordCount
{
/* ~J$ hadoop jar <jar file name > < Driver Code > < input file name > < output file name >
* ~J$ hadoop jar WordCount.jar NewWordCounti/p o/p
*/
public static void main(String[] args) throws Exception
{
//Creating an object of Configuration class, which loads the configuration parameters
Configuration conf = newConfiguration();
//Creating the object of Job class and passing the confobject and Job name as arguments. The Job class allows the user to configure the job, submit it and control its execution.
Job job = new Job(conf, "wordcount");
//Setting the jar by finding where a given class came from
job.setJarByClass(NewWordCount.class);
//Setting the key class for job output data
job.setOutputKeyClass(Text.class);
//Setting the value class for job output data
job.setOutputValueClass(IntWritable.class);
//Setting the mapper for the job
job.setMapperClass(NewWordMapper.class);
//Setting the reducer for the job
job.setReducerClass(NewWordReducer.class);
//Setting the Input Format for the job
job.setInputFormatClass(TextInputFormat.class);
//Setting the Output Format for the job
job.setOutputFormatClass(TextOutputFormat.class);
//Adding a path which will act as a input for MR job. args[0] means it will use the first argument written on terminal as input path
FileInputFormat.addInputPath(job, new Path(args[0]));
//Setting the path to a directory where MR job will dump the output. args[1] means it will use the second argument written on terminal as output path
FileOutputFormat.setOutputPath(job,new Path(args[1]));
//Submitting the job to the cluster and waiting for its completion
job.waitForCompletion(true);
}
}

Big Data Hadoop Course

About Java Padho

Einstein said, "It is the supreme art of the teacher to awaken joy in creative expression and knowledge." Great teachers are scarce, as they hold nothing back and give everything away. Javapadho is the brainchild of such an extraordinary teacher, i.e. Mr. Praveen Kumar Chandaliya, the founder of SDJ Infosoft.