# A MR job counts occurrences of every word in the article texts. # # rm -rf step-5-out-sol1; perl step-5-solution1.pl /home/straka/wiki/cs-text-medium/ step-5-out-sol1 # less step-5-out-sol1/part-* package My::Mapper; use Moose; with 'Hadoop::Mapper'; sub map { my ($self, $key, $value, $context) = @_; foreach my $word (split /\W/, $value) { next if not length $word; $context->write($word, 1); } } package My::Reducer; use Moose; with 'Hadoop::Reducer'; sub reduce { my ($self, $key, $values, $context) = @_; my $sum = 0; while ($values->next) { $sum += $values->value; } $context->write($key, $sum); } package main; use Hadoop::Runner; my $runner = Hadoop::Runner->new( mapper => My::Mapper->new(), reducer => My::Reducer->new(), input_format => 'KeyValueTextInputFormat'); $runner->run();