# A MR job counts occurrences of every word in the article texts.
#
# rm -rf step-5-out-sol1; perl step-5-solution1.pl /home/straka/wiki/cs-text-medium/ step-5-out-sol1
# less step-5-out-sol1/part-*

package My::Mapper;
use Moose;
with 'Hadoop::Mapper';

sub map {
  my ($self, $key, $value, $context) = @_;

  foreach my $word (split /\W/, $value) {
    next if not length $word;
    $context->write($word, 1);
  }
}

package My::Reducer;
use Moose;
with 'Hadoop::Reducer';

sub reduce {
  my ($self, $key, $values, $context) = @_;

  my $sum = 0;
  while ($values->next) {
    $sum += $values->value;
  }

  $context->write($key, $sum);
}

package main;
use Hadoop::Runner;

my $runner = Hadoop::Runner->new(
  mapper => My::Mapper->new(),
  reducer => My::Reducer->new(),
  input_format => 'KeyValueTextInputFormat');

$runner->run();