# A MR creates an ascending list of unique article names and at the same time an ascending list of unique words. # # rm -rf step-8-out-sol; perl step-8-solution.pl -c 2 -r 2 /home/straka/wiki/cs-text-medium/ step-8-out-sol # less step-8-out-sol/part-* package My::Mapper; use Moose; with 'Hadoop::Mapper'; sub map { my ($self, $key, $value, $context) = @_; $context->write($key, 0); foreach my $word (split /\W/, $value) { next if not length $word; $context->write($word, 1); } } package My::Partitioner; use Moose; with 'Hadoop::Partitioner'; sub getPartition { my ($self, $key, $value, $partitions) = @_; return $value; } package My::Reducer; use Moose; with 'Hadoop::Reducer'; sub reduce { my ($self, $key, $values, $context) = @_; $context->write($key); } package main; use Hadoop::Runner; my $runner = Hadoop::Runner->new( mapper => My::Mapper->new(), partitioner => My::Partitioner->new(), reducer => My::Reducer->new(), input_format => 'KeyValueTextInputFormat'); $runner->run();