Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision Next revision Both sides next revision | ||
courses:mapreduce-tutorial:step-24 [2012/01/27 20:56] straka |
courses:mapreduce-tutorial:step-24 [2012/01/27 21:41] straka |
||
---|---|---|---|
Line 1: | Line 1: | ||
====== MapReduce Tutorial : Mappers, running Java Hadoop jobs ====== | ====== MapReduce Tutorial : Mappers, running Java Hadoop jobs ====== | ||
- | We start by exploring | + | We start by going through |
+ | |||
+ | A mapper which processes (key, value) pairs of types (Kin, Vin) and produces (key, value) pairs of types (Kout, Vout) must be a subclass of [[http:// | ||
+ | |||
+ | |||
+ | http:// | ||
+ | |||
+ | The Mapper outputs only keys starting with '' | ||
<file java MapperOnlyHadoopJob.java> | <file java MapperOnlyHadoopJob.java> | ||
import java.io.IOException; | import java.io.IOException; | ||
Line 18: | Line 26: | ||
public void setup(Context context) { | public void setup(Context context) { | ||
} | } | ||
- | | + | |
public void map(Text key, Text value, Context context) throws IOException, | public void map(Text key, Text value, Context context) throws IOException, | ||
if (key.getLength() > 0 && Character.toUpperCase(key.charAt(0)) == ' | if (key.getLength() > 0 && Character.toUpperCase(key.charAt(0)) == ' | ||
Line 26: | Line 34: | ||
public void cleanup(Context context) { | public void cleanup(Context context) { | ||
- | } | + | } |
} | } | ||
- | | + | |
// Job configuration | // Job configuration | ||
public int run(String[] args) throws Exception { | public int run(String[] args) throws Exception { | ||
Line 35: | Line 43: | ||
return 1; | return 1; | ||
} | } | ||
- | | + | |
Job job = new Job(getConf(), | Job job = new Job(getConf(), | ||
- | | + | |
job.setJarByClass(this.getClass()); | job.setJarByClass(this.getClass()); | ||
job.setMapperClass(TheMapper.class); | job.setMapperClass(TheMapper.class); | ||
job.setOutputKeyClass(Text.class); | job.setOutputKeyClass(Text.class); | ||
job.setOutputValueClass(Text.class); | job.setOutputValueClass(Text.class); | ||
- | | + | |
job.setInputFormatClass(KeyValueTextInputFormat.class); | job.setInputFormatClass(KeyValueTextInputFormat.class); | ||
- | | + | |
FileInputFormat.addInputPath(job, | FileInputFormat.addInputPath(job, | ||
FileOutputFormat.setOutputPath(job, | FileOutputFormat.setOutputPath(job, | ||
- | | + | |
return job.waitForCompletion(true) ? 0 : 1; | return job.waitForCompletion(true) ? 0 : 1; | ||
} | } | ||
Line 57: | Line 65: | ||
System.exit(res); | System.exit(res); | ||
} | } | ||
- | } | + | } |
</ | </ | ||
+ | |||
+ | ===== Running the job ===== | ||
+ | Download the source and compile it. | ||
+ | |||
+ | The official way of running Hadoop jobs is to use the ''/ | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ |