Differences
This shows you the differences between two versions of the page.
| Both sides previous revision Previous revision Next revision | Previous revision | ||
|
courses:mapreduce-tutorial:step-29 [2012/01/29 16:27] straka |
courses:mapreduce-tutorial:step-29 [2012/02/05 19:14] (current) straka |
||
|---|---|---|---|
| Line 1: | Line 1: | ||
| - | ====== MapReduce Tutorial : Custom | + | ====== MapReduce Tutorial : Custom |
| - | WholeFile | + | ====== Custom sorting comparator ====== |
| - | FileAsPath | + | |
| - | ParagraphFile | + | The keys are sorted before processed by a reducer, using a |
| + | [[http:// | ||
| + | |||
| + | <code java> | ||
| + | public static class IntPair implements WritableComparable< | ||
| + | private int first = 0; | ||
| + | private int second = 0; | ||
| + | |||
| + | public void set(int left, int right) { first = left; second = right; } | ||
| + | public int getFirst() { return first; } | ||
| + | public int getSecond() { return second; } | ||
| + | |||
| + | public void readFields(DataInput in) throws IOException { | ||
| + | first = in.readInt(); | ||
| + | second = in.readInt(); | ||
| + | } | ||
| + | public void write(DataOutput out) throws IOException { | ||
| + | out.writeInt(first); | ||
| + | out.writeInt(second); | ||
| + | } | ||
| + | |||
| + | public int compareTo(IntPair o) { | ||
| + | if (first != o.first) return first < o.first ? -1 : 1; | ||
| + | else return second < o.second ? -1 : second == o.second ? 0 : 1; | ||
| + | } | ||
| + | } | ||
| + | </ | ||
| + | |||
| + | If we would like in a Hadoop job to sort the '' | ||
| + | |||
| + | <code java> | ||
| + | public static class IntPair implements WritableComparable< | ||
| + | ... | ||
| + | public static class FirstOnlyComparator implements RawComparator< | ||
| + | public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { | ||
| + | int first1 = WritableComparator.readInt(b1, | ||
| + | int first2 = WritableComparator.readInt(b2, | ||
| + | return first1 < first2 ? -1 : first1 == first2 ? 0 : 1; | ||
| + | } | ||
| + | public int compare(IntPair x, IntPair y) { | ||
| + | return x.getFirst() < y.getFirst() ? -1 : x.getFirst() == y.getFirst() ? 0 : 1; | ||
| + | } | ||
| + | } | ||
| + | } | ||
| + | |||
| + | ... | ||
| + | |||
| + | job.setSortComparatorClass(IntPair.FirstOnlyComparator.class); | ||
| + | </ | ||
| + | Notice we used helper function '' | ||
| + | |||
| + | ====== Grouping comparator ====== | ||
| + | |||
| + | In a reduce, it is guaranteed that keys are processed in ascending order. Sometimes it would be useful if the //values associated with one key// could also be processed in ascending order. | ||
| + | |||
| + | That is possible only to some degree. The (key, value) pairs are compared //using the key only//. After the (key, value) pairs are sorted, the (key, value) pairs with the same key are grouped together. This grouping can be performed using a custom '' | ||
| + | |||
| + | As an example, consider that the input consists of ('' | ||
| + | - The mapper produces ('' | ||
| + | - These pairs are sorted by the '' | ||
| + | - The custom grouping comparator is used, which groups the '' | ||
| + | <code java> | ||
| + | public static class IntPair implements WritableComparable< | ||
| + | ... | ||
| + | public static class FirstOnlyComparator implements RawComparator< | ||
| + | public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { | ||
| + | int first1 = WritableComparator.readInt(b1, | ||
| + | int first2 = WritableComparator.readInt(b2, | ||
| + | return first1 < first2 ? -1 : first1 == first2 ? 0 : 1; | ||
| + | } | ||
| + | public int compare(IntPair x, IntPair y) { | ||
| + | return x.getFirst() < y.getFirst() ? -1 : x.getFirst() == y.getFirst() ? 0 : 1; | ||
| + | } | ||
| + | } | ||
| + | } | ||
| + | |||
| + | ... | ||
| + | job.setGroupingComparatorClass(IntPair.FirstOnlyComparator.class); | ||
| + | </ | ||
| + | |||
| + | ====== Exercise ====== | ||
| + | |||
| + | Improve the [[.: | ||
| + | |||
| + | Use the same approach as with the '' | ||
| + | |||
| + | ---- | ||
| + | |||
| + | < | ||
| + | <table style=" | ||
| + | < | ||
| + | <td style=" | ||
| + | <td style=" | ||
| + | <td style=" | ||
| + | </ | ||
| + | </ | ||
| + | </ | ||
