0% found this document useful (0 votes)

27 views10 pages

Coding

Uploaded by

sinchussirsi

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

27 views10 pages

Coding

Uploaded by

sinchussirsi

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 10

WordCountPartitioner.

java

Objective:
Write a MapReduce program to count the occurrence of similar words in a file.
Use a partitioner to partition key/value pairs based on alphabets.

Partitioner class (WordCountPartitioner.java)

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class WordCountPartitioner extends Partitioner<Text, IntWritable> {

@Override
public int getPartition(Text key, IntWritable value, int numPartitions) {
String word = key.toString();
char alphabet = word.toUpperCase().charAt(0);
int partitionNumber = 0;

switch (alphabet) {
case 'A': partitionNumber = 1; break;
case 'B': partitionNumber = 2; break;
case 'C': partitionNumber = 3; break;
case 'D': partitionNumber = 4; break;
case 'E': partitionNumber = 5; break;
case 'F': partitionNumber = 6; break;
case 'G': partitionNumber = 7; break;
case 'H': partitionNumber = 8; break;
case 'I': partitionNumber = 9; break;
case 'J': partitionNumber = 10; break;
case 'K': partitionNumber = 11; break;
case 'L': partitionNumber = 12; break;
case 'M': partitionNumber = 13; break;
case 'N': partitionNumber = 14; break;
case 'O': partitionNumber = 15; break;
case 'P': partitionNumber = 16; break;
case 'Q': partitionNumber = 17; break;
case 'R': partitionNumber = 18; break;
case 'S': partitionNumber = 19; break;
case 'T': partitionNumber = 20; break;
case 'U': partitionNumber = 21; break;
case 'V': partitionNumber = 22; break;
case 'W': partitionNumber = 23; break;
case 'X': partitionNumber = 24; break;
case 'Y': partitionNumber = 25; break;
case 'Z': partitionNumber = 26; break;

1
Adithya M (Dept. of CSE, CEC)
default: partitionNumber = 0; break;
}

// Hadoop expects partition number to be in range [0, numPartitions-1]

return partitionNumber % numPartitions;
}
}
Driver Class (WordCountDriver.java)

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCountDriver {

public static void main(String[] args) throws Exception {

if (args.length != 2) {
System.err.println("Usage: WordCountDriver <input path> <output path>");
System.exit(-1);
}

Configuration conf = new Configuration();

Job job = Job.getInstance(conf, "Word Count with Custom Partitioner");

job.setJarByClass(WordCountDriver.class);

// Set Mapper and Reducer classes (assumed to exist)

job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);

// Set output key and value types

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

// Set Partitioner class

job.setPartitionerClass(WordCountPartitioner.class);

// Set number of reduce tasks (must match partitions)

job.setNumReduceTasks(27); // A–Z + default

// Set input and output paths

FileInputFormat.addInputPath(job, new Path(args[0]));
2
Adithya M (Dept. of CSE, CEC)
FileOutputFormat.setOutputPath(job, new Path(args[1]));

// Submit job and wait for completion

System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

Input Data
A text file containing lines such as:
• Welcome to Hadoop Session
• Introduction to Hadoop
• Introducing Hive
• Hive Session
• Pig Session

MapReduce Searching
Input file (student records) is stored in HDFS.
1001,John,45
1002,Jack,39
1003,Alex,44
1004,Smith,38
1005,Bob,33

Driver (WordSearcher.java) sets the job configuration and specifies the keyword to search
(e.g., "Jack").
Mapper (WordSearchMapper.java) reads line by line and checks if the keyword is present.
• If yes, it emits (line, position) as (key, value).
Reducer (WordSearchReducer.java) simply outputs the key-value pairs received from the
mapper.
Final output is stored in HDFS output path with only the lines that matched the keyword.

1. Mapper (WordSearchMapper.java)

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

public class WordSearchMapper extends Mapper<LongWritable, Text, Text, Text> {

static String keyword;
static int pos = 0;
3
Adithya M (Dept. of CSE, CEC)
protected void setup(Context context) throws IOException, InterruptedException {
Configuration configuration = context.getConfiguration();
keyword = configuration.get("keyword");
}

protected void map(LongWritable key, Text value, Context context)

throws IOException, InterruptedException {
InputSplit i = context.getInputSplit(); // Get the input split for this map
FileSplit f = (FileSplit) i;
String fileName = f.getPath().getName();
Integer wordPos;
pos++;

if (value.toString().contains(keyword)) {
wordPos = value.find(keyword);
context.write(value, new Text(fileName + "," + new IntWritable(pos).toString()
+ "," + wordPos.toString()));
}
}
}
2. Reducer (WordSearchReducer.java)

import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WordSearchReducer extends Reducer<Text, Text, Text, Text> {

protected void reduce(Text key, Iterable<Text> value, Context context)
throws IOException, InterruptedException {
for (Text val : value) {
context.write(key, val);
}
}
}

3. Driver (WordSearcher.java)
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordSearcher {

4
Adithya M (Dept. of CSE, CEC)
public static void main(String[] args) throws IOException, InterruptedException,
ClassNotFoundException {
Configuration conf = new Configuration();
conf.set("keyword", "Jack"); // keyword to search

Job job = Job.getInstance(conf, "Word Search");

job.setJarByClass(WordSearcher.class);
job.setMapperClass(WordSearchMapper.class);
job.setReducerClass(WordSearchReducer.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

job.setNumReduceTasks(1);

FileInputFormat.addInputPath(job, new Path("/mapreduce/student.csv"));

FileOutputFormat.setOutputPath(job, new Path("/mapreduce/output/search"));

System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

MapReduce Sorting
Objective & Input Data
• Objective:
To write a MapReduce program that sorts student records by student name.
• Input Data (student.csv):
1001,John,45
1002,Jack,39
1003,Alex,44
1004,Smith,38
1005,Bob,33

Expected Sorted Output (by name):

1003,Alex,44
1005,Bob,33
1002,Jack,39
1001,John,45
1004,Smith,38

1. Mapper (SortMapper inside SortStudNames.java)

import java.io.IOException;
import org.apache.hadoop.io.LongWritable;

5
Adithya M (Dept. of CSE, CEC)
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class SortStudNames {

public static class SortMapper extends Mapper<LongWritable, Text, Text, Text> {

protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] token = value.toString().split(",");
// token[0] = ID, token[1] = Name, token[2] = Marks
context.write(new Text(token[1]), new Text(token[0] + "," + token[2]));
}
}
2. Reducer (SortReducer inside SortStudNames.java)

import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public static class SortReducer extends Reducer<Text, Text, NullWritable, Text> {

public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
for (Text details : values) {
context.write(NullWritable.get(), details);
}
}
}
3. Driver (main method inside SortStudNames.java)
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public static void main(String[] args) throws IOException, InterruptedException,

ClassNotFoundException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Sort Students by Name");

job.setJarByClass(SortStudNames.class);
job.setMapperClass(SortMapper.class);
job.setReducerClass(SortReducer.class);

job.setOutputKeyClass(Text.class);

6
Adithya M (Dept. of CSE, CEC)
job.setOutputValueClass(Text.class);

FileInputFormat.setInputPaths(job, new Path("/mapreduce/student.csv"));

FileOutputFormat.setOutputPath(job, new Path("/mapreduce/output/sorted"));

System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

Mapreduce Word Count

Mapper Code: You have to copy paste this program into the WCMapper Java
Class file.
// Importing libraries
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class WCMapper extends MapReduceBase implements Mapper<LongWritable,

Text, Text, IntWritable> {

// Map function
public void map(LongWritable key, Text value, OutputCollector<Text,
IntWritable> output, Reporter rep) throws IOException
{

String line = value.toString();

// Splitting the line on spaces

for (String word : line.split(" "))
{
if (word.length() > 0)
{
output.collect(new Text(word), new IntWritable(1));

7
Adithya M (Dept. of CSE, CEC)
}
}
}
}
Reducer Code: You have to copy paste this program into the WCReducer
Java Class file.
// Importing libraries
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class WCReducer extends MapReduceBase implements Reducer<Text,

IntWritable, Text, IntWritable> {

// Reduce function
public void reduce(Text key, Iterator<IntWritable> value,
OutputCollector<Text, IntWritable> output,
Reporter rep) throws IOException
{

int count = 0;

// Counting the frequency of each words

while (value.hasNext())
{
IntWritable i = value.next();
count += i.get();
}

output.collect(key, new IntWritable(count));

}
}
Driver Code: You have to copy paste this program into the WCDriver Java
Class file.

8
Adithya M (Dept. of CSE, CEC)
// Importing libraries
import java.io.IOException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WCDriver extends Configured implements Tool {

public int run(String args[]) throws IOException

{
if (args.length < 2)
{
System.out.println("Please give valid inputs");
return -1;
}

JobConf conf = new JobConf(WCDriver.class);

FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(WCMapper.class);
conf.setReducerClass(WCReducer.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(IntWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
JobClient.runJob(conf);
return 0;
}

// Main Method
public static void main(String args[]) throws Exception
{
int exitCode = ToolRunner.run(new WCDriver(), args);

9
Adithya M (Dept. of CSE, CEC)
System.out.println(exitCode);
}
}

https://www.geeksforgeeks.org/data-engineering/how-to-execute-wordcount-program-in-
mapreduce-using-cloudera-distribution-hadoop-cdh/
https://youtu.be/lB8G0a_LjqA?si=-JNvUYhnc1UT_ZsZ
https://youtu.be/knAS0w-jiUk?si=4yoY1cEq0-LKQvFb
https://youtu.be/Dp2-dAftD1Q?si=pWac3qtl5PzyM2Sf

10
Adithya M (Dept. of CSE, CEC)

Practical 3bcbs
No ratings yet
Practical 3bcbs
5 pages
Word Count Example
No ratings yet
Word Count Example
4 pages
BDAPract 4
No ratings yet
BDAPract 4
5 pages
Experiment-4 BDA LAB
No ratings yet
Experiment-4 BDA LAB
7 pages
Java MapReduce Word Count Example
No ratings yet
Java MapReduce Word Count Example
15 pages
Steps To Create Jar File and Execute Word Count Problem in Mapper Reducer
No ratings yet
Steps To Create Jar File and Execute Word Count Problem in Mapper Reducer
5 pages
BDC Output 3
No ratings yet
BDC Output 3
4 pages
Exp 11
No ratings yet
Exp 11
4 pages
MapReduce Word and Character Count Guide
No ratings yet
MapReduce Word and Character Count Guide
22 pages
DA Lab Program-2
No ratings yet
DA Lab Program-2
6 pages
Practical 2c
No ratings yet
Practical 2c
2 pages
Kick Start Hadoop: Word Count - Hadoop Map Reduce Example
No ratings yet
Kick Start Hadoop: Word Count - Hadoop Map Reduce Example
13 pages
Mapreduce Program
No ratings yet
Mapreduce Program
3 pages
Lab-1-Steps-Word Count Problem-Hadoop
No ratings yet
Lab-1-Steps-Word Count Problem-Hadoop
6 pages
Java Hadoop Word Count Tutorial
No ratings yet
Java Hadoop Word Count Tutorial
4 pages
MapReduce Word Count Program Guide
No ratings yet
MapReduce Word Count Program Guide
14 pages
Exp 4 Word Count
No ratings yet
Exp 4 Word Count
4 pages
Hadoop Word Count with MapReduce
No ratings yet
Hadoop Word Count with MapReduce
6 pages
Big Data Analytics with Hadoop Guide
No ratings yet
Big Data Analytics with Hadoop Guide
10 pages
Sanoob BDA - 2
No ratings yet
Sanoob BDA - 2
4 pages
Word Count Program With MapReduce and Java
No ratings yet
Word Count Program With MapReduce and Java
6 pages
MapReduce Word Count Example in Java
No ratings yet
MapReduce Word Count Example in Java
6 pages
Import Import Import Import Import Import Import Import Public Class Extends Implements
No ratings yet
Import Import Import Import Import Import Import Import Public Class Extends Implements
7 pages
MapReduce Programs
No ratings yet
MapReduce Programs
10 pages
Word Count Program in MapReduce
No ratings yet
Word Count Program in MapReduce
5 pages
BDA3
No ratings yet
BDA3
7 pages
Hadoop Mapred
100% (1)
Hadoop Mapred
11 pages
All
No ratings yet
All
11 pages
B1 Instructions
No ratings yet
B1 Instructions
9 pages
Aji Bda2 Final
No ratings yet
Aji Bda2 Final
4 pages
CS246 TA Session: Hadoop Tutorial: Peyman Kazemian 1/11/2011
No ratings yet
CS246 TA Session: Hadoop Tutorial: Peyman Kazemian 1/11/2011
13 pages
Big Data Practical 2
No ratings yet
Big Data Practical 2
11 pages
Bda Practical 3
No ratings yet
Bda Practical 3
3 pages
Setting Up Eclipse:: Codelab 1 Introduction To The Hadoop Environment (Version 0.17.0)
No ratings yet
Setting Up Eclipse:: Codelab 1 Introduction To The Hadoop Environment (Version 0.17.0)
9 pages
Cloud LAB 10.1,11.1,12.1
No ratings yet
Cloud LAB 10.1,11.1,12.1
6 pages
Dsbda 11
No ratings yet
Dsbda 11
15 pages
Exp3 - Map Reduce Code
No ratings yet
Exp3 - Map Reduce Code
2 pages
Advanced Mapreduce
No ratings yet
Advanced Mapreduce
37 pages
Practical-2 Aim: Write A Program of Word Count in Map Reduce Over HDFS. Description
No ratings yet
Practical-2 Aim: Write A Program of Word Count in Map Reduce Over HDFS. Description
6 pages
CTBD Sol02
No ratings yet
CTBD Sol02
2 pages
Map Reduce Program
No ratings yet
Map Reduce Program
2 pages
Map Reduce
No ratings yet
Map Reduce
57 pages
Dllction To MAPREDUCE Afflrlling: L Tro
No ratings yet
Dllction To MAPREDUCE Afflrlling: L Tro
12 pages
Wordcount
No ratings yet
Wordcount
3 pages
ADA Lab Manual
No ratings yet
ADA Lab Manual
34 pages
Practical 2-1
No ratings yet
Practical 2-1
4 pages
Experiment 1 2
No ratings yet
Experiment 1 2
19 pages
Run Wordcount
No ratings yet
Run Wordcount
3 pages
PART 1 - Install Java and Hadoop On Ubuntu
No ratings yet
PART 1 - Install Java and Hadoop On Ubuntu
4 pages
Sanjith BDA 2
No ratings yet
Sanjith BDA 2
4 pages
MapReduce Workflow in Hadoop
No ratings yet
MapReduce Workflow in Hadoop
28 pages
Big Data Unit 3 - PPT2
No ratings yet
Big Data Unit 3 - PPT2
10 pages
Ex No 04
No ratings yet
Ex No 04
4 pages
Assignment 2
No ratings yet
Assignment 2
7 pages
BDA University Questions
No ratings yet
BDA University Questions
10 pages
Hadoop and Map Reduce
No ratings yet
Hadoop and Map Reduce
27 pages
Java MapReduce Word Count Example
No ratings yet
Java MapReduce Word Count Example
2 pages
Hadoop MapReduce InputFormat Overview
No ratings yet
Hadoop MapReduce InputFormat Overview
13 pages
Chapter 3
No ratings yet
Chapter 3
11 pages
Compiler Phases Explained
No ratings yet
Compiler Phases Explained
5 pages
Remedial Worksheet2 Grade 8 AK
No ratings yet
Remedial Worksheet2 Grade 8 AK
3 pages
Abstract Class in Java
No ratings yet
Abstract Class in Java
5 pages
Beej's Guide To C Programming
No ratings yet
Beej's Guide To C Programming
750 pages
Overview of Software and Programming
No ratings yet
Overview of Software and Programming
11 pages
02 ES6 TypeScript
No ratings yet
02 ES6 TypeScript
37 pages
Error Logs
No ratings yet
Error Logs
17 pages
Logcat
No ratings yet
Logcat
20 pages
Constructors and Destructors Keerthi
No ratings yet
Constructors and Destructors Keerthi
12 pages
DS Lab 1A and 1B Program
No ratings yet
DS Lab 1A and 1B Program
6 pages
Vehicle Rental System CPP
No ratings yet
Vehicle Rental System CPP
3 pages
Devraj Singh: Android Developer CV
No ratings yet
Devraj Singh: Android Developer CV
4 pages
Sir's Questions
No ratings yet
Sir's Questions
6 pages
Understanding SQL Joins with Examples
No ratings yet
Understanding SQL Joins with Examples
9 pages
Year 8 Python Basics
No ratings yet
Year 8 Python Basics
6 pages
Reading and Writing From File
No ratings yet
Reading and Writing From File
23 pages
Java Viva Questions
No ratings yet
Java Viva Questions
5 pages
Osed
No ratings yet
Osed
2 pages
Understanding Array Elements and Indexing
No ratings yet
Understanding Array Elements and Indexing
28 pages
Grrovy Setting User Variable
No ratings yet
Grrovy Setting User Variable
2 pages
اسئلة حلول مد شعبة A
No ratings yet
اسئلة حلول مد شعبة A
4 pages
Linux File Descriptor Hacking
No ratings yet
Linux File Descriptor Hacking
6 pages
Java Programming Exam Winter 2015
No ratings yet
Java Programming Exam Winter 2015
5 pages
ComputerScience-SQP TERM1
No ratings yet
ComputerScience-SQP TERM1
14 pages
BCSE 1102 INTRODUCTION TO PROGRAMMING CAT G1 Marking Scheme
No ratings yet
BCSE 1102 INTRODUCTION TO PROGRAMMING CAT G1 Marking Scheme
3 pages
Bipsi Game Maker Scripting Guide
No ratings yet
Bipsi Game Maker Scripting Guide
28 pages
MSC (CS-IT) CCT in Java NEP 2020-June-2023 Pattern Practical Journal
No ratings yet
MSC (CS-IT) CCT in Java NEP 2020-June-2023 Pattern Practical Journal
75 pages
CS QP Set-3
No ratings yet
CS QP Set-3
12 pages
XSLT Basics: Transform XML to HTML
No ratings yet
XSLT Basics: Transform XML to HTML
30 pages

Coding

Uploaded by

Coding

Uploaded by

WordCountPartitioner.

Partitioner class (WordCountPartitioner.java)

public class WordCountPartitioner extends Partitioner<Text, IntWritable> {

// Hadoop expects partition number to be in range [0, numPartitions-1]

public class WordCountDriver {

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();

// Set Mapper and Reducer classes (assumed to exist)

// Set output key and value types

// Set Partitioner class

// Set number of reduce tasks (must match partitions)

// Set input and output paths

// Submit job and wait for completion

public class WordSearchMapper extends Mapper<LongWritable, Text, Text, Text> {

protected void map(LongWritable key, Text value, Context context)

public class WordSearchReducer extends Reducer<Text, Text, Text, Text> {

public class WordSearcher {

Job job = Job.getInstance(conf, "Word Search");

FileInputFormat.addInputPath(job, new Path("/mapreduce/student.csv"));

Expected Sorted Output (by name):

1. Mapper (SortMapper inside SortStudNames.java)

public class SortStudNames {

public static class SortMapper extends Mapper<LongWritable, Text, Text, Text> {

public static class SortReducer extends Reducer<Text, Text, NullWritable, Text> {

public static void main(String[] args) throws IOException, InterruptedException,

FileInputFormat.setInputPaths(job, new Path("/mapreduce/student.csv"));

Mapreduce Word Count

public class WCMapper extends MapReduceBase implements Mapper<LongWritable,

String line = value.toString();

// Splitting the line on spaces

public class WCReducer extends MapReduceBase implements Reducer<Text,

// Counting the frequency of each words

output.collect(key, new IntWritable(count));

public class WCDriver extends Configured implements Tool {

public int run(String args[]) throws IOException

JobConf conf = new JobConf(WCDriver.class);

You might also like