Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision Next revision Both sides next revision | ||
courses:mapreduce-tutorial:step-24 [2012/01/27 20:40] straka |
courses:mapreduce-tutorial:step-24 [2012/01/27 21:41] straka |
||
---|---|---|---|
Line 1: | Line 1: | ||
====== MapReduce Tutorial : Mappers, running Java Hadoop jobs ====== | ====== MapReduce Tutorial : Mappers, running Java Hadoop jobs ====== | ||
- | We start by exploring | + | We start by going through |
+ | |||
+ | A mapper which processes (key, value) pairs of types (Kin, Vin) and produces (key, value) pairs of types (Kout, Vout) must be a subclass of [[http:// | ||
+ | |||
+ | |||
+ | http:// | ||
+ | |||
+ | The Mapper outputs only keys starting with '' | ||
+ | |||
+ | <file java MapperOnlyHadoopJob.java> | ||
+ | import java.io.IOException; | ||
+ | |||
+ | import org.apache.hadoop.conf.*; | ||
+ | import org.apache.hadoop.fs.Path; | ||
+ | import org.apache.hadoop.io.*; | ||
+ | import org.apache.hadoop.mapreduce.*; | ||
+ | import org.apache.hadoop.mapreduce.lib.input.*; | ||
+ | import org.apache.hadoop.mapreduce.lib.output.*; | ||
+ | import org.apache.hadoop.util.*; | ||
+ | |||
+ | public class MapperOnlyHadoopJob extends Configured implements Tool { | ||
+ | // Mapper | ||
+ | public static class TheMapper extends Mapper< | ||
+ | public void setup(Context context) { | ||
+ | } | ||
+ | |||
+ | public void map(Text key, Text value, Context context) throws IOException, | ||
+ | if (key.getLength() > 0 && Character.toUpperCase(key.charAt(0)) == ' | ||
+ | context.write(key, | ||
+ | } | ||
+ | } | ||
+ | |||
+ | public void cleanup(Context context) { | ||
+ | } | ||
+ | } | ||
+ | |||
+ | // Job configuration | ||
+ | public int run(String[] args) throws Exception { | ||
+ | if (args.length < 2) { | ||
+ | System.err.printf(" | ||
+ | return 1; | ||
+ | } | ||
+ | |||
+ | Job job = new Job(getConf(), | ||
+ | |||
+ | job.setJarByClass(this.getClass()); | ||
+ | job.setMapperClass(TheMapper.class); | ||
+ | job.setOutputKeyClass(Text.class); | ||
+ | job.setOutputValueClass(Text.class); | ||
+ | |||
+ | job.setInputFormatClass(KeyValueTextInputFormat.class); | ||
+ | |||
+ | FileInputFormat.addInputPath(job, | ||
+ | FileOutputFormat.setOutputPath(job, | ||
+ | |||
+ | return job.waitForCompletion(true) ? 0 : 1; | ||
+ | } | ||
+ | |||
+ | // Main method | ||
+ | public static void main(String[] args) throws Exception { | ||
+ | int res = ToolRunner.run(new MapperOnlyHadoopJob(), | ||
+ | |||
+ | System.exit(res); | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | ===== Running the job ===== | ||
+ | Download the source and compile it. | ||
+ | |||
+ | The official way of running Hadoop jobs is to use the ''/ | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||