Table of Contents

MapReduce Tutorial - Perl API

Hadoop::Runner

package Hadoop::Runner;
use Moose;
 
has 'mapper' => (does => 'Hadoop::Mapper', required => 1);
has 'reducer' => (does => 'Hadoop::Reducer');
has 'combiner' => (does => 'Hadoop::Reducer');
has 'partitioner' => (does => 'Hadoop::Partitioner');
 
has 'input_format' => (isa => 'InputFormat', default => 'TextInputFormat');
has 'output_format' => (isa => 'OutputFormat', default => 'TextOutputFormat');
has 'output_compression' => (isa => 'Bool', default => 0);
 
has 'hadoop_prefix' => (isa => 'Str', default => '/SGE/HADOOP/active');
has 'copy_environment' => (isa => 'ArrayRef[Str]', default => sub { [] });
 
sub run();

Command line arguments supported by Hadoop::Runner::run()

script.pl [-jt jobtracker | -c number_of_machines [-w secs]] [-r reducers] [-Dname=value -Dname=value ...] input output
script.pl --map number_of_reducers
script.pl --reduce
script.pl --combine

Hadoop::Mapper

package Hadoop::Mapper;
use Moose::Role;
 
requires 'map';
 
sub setup() {}
sub cleanup {}

Hadoop::Reducer

package Hadoop::Reduce;
use Moose::Role;
 
requires 'reduce';
 
sub setup() {}
sub cleanup {}

Hadoop::Partitioner

package Hadoop::Partitioner;
use Moose::Role;
 
requires 'getPartition';
 
sub setup {}
sub cleanup {}

Available environmental variables