# A MR job generates an inverted index. # Inverted index contains for each word all its occurrences, where each # occurrence is pair (article of occurrence, position of occurrence). # # rm -rf step-5-out-sol2; perl step-5-solution2.pl /home/straka/wiki/cs-text-medium/ step-5-out-sol2 # less step-5-out-sol2/part-* package My::Mapper; use Moose; with 'Hadoop::Mapper'; sub map { my ($self, $key, $value, $context) = @_; my $i = 1; foreach my $word (split /\W/, $value) { next if not length $word; $context->write($word, "$key:$i"); $i++; } } package My::Reducer; use Moose; with 'Hadoop::Reducer'; sub reduce { my ($self, $key, $values, $context) = @_; my $occurences = ""; while ($values->next) { $occurences .= (length $occurences ? " " : "") . $values->value; } $context->write($key, $occurences); } package main; use Hadoop::Runner; my $runner = Hadoop::Runner->new( mapper => My::Mapper->new(), reducer => My::Reducer->new(), input_format => 'KeyValueTextInputFormat'); $runner->run();