Word Count
Word Count
Input:
GeeksforGeeks 1
Hello 2
I 2
Intern 1
am 2
an 1
Steps:
First Open Eclipse -> then select File -> New -> Java
Project ->Name it WordCount -> then Finish.
Create Three Java Classes into the project. Name
them WCDriver(having the main
function), WCMapper, WCReducer.
You have to include two Reference Libraries for that:
Right Click on Project -> then select Build Path-> Click
on Configure Build Path
In the above figure, you can see the Add External JARs
option on the Right Hand Side. Click on it and add the below
mention files. You can find these files in /usr/lib/
1. /usr/lib/hadoop-0.20-mapreduce/hadoop-core-2.6.0-mr1-
cdh5.13.0.jar
2. /usr/lib/hadoop/hadoop-common-2.6.0-cdh5.13.0.jar
Mapper Code: You have to copy paste this program into the
WCMapper Java Class file.
Java
// Importing libraries
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
// Map function
public void map(LongWritable key, Text value, OutputCollector<Text,
IntWritable> output, Reporter rep) throws IOException
{
Reducer Code: You have to copy paste this program into the
WCReducer Java Class file.
Java
// Importing libraries
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
// Reduce function
public void reduce(Text key, Iterator<IntWritable> value,
OutputCollector<Text, IntWritable> output,
Reporter rep) throws IOException
{
int count = 0;
Driver Code: You have to copy paste this program into the
WCDriver Java Class file.
Java
// Importing libraries
import java.io.IOException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WCDriver extends Configured implements Tool {
// Main Method
public static void main(String args[]) throws Exception
{
int exitCode = ToolRunner.run(new WCDriver(), args);
System.out.println(exitCode);
}
}