Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision Next revision Both sides next revision | ||
courses:mapreduce-tutorial:step-29 [2012/01/29 17:23] straka |
courses:mapreduce-tutorial:step-29 [2012/02/05 18:49] straka |
||
---|---|---|---|
Line 1: | Line 1: | ||
- | ====== MapReduce Tutorial : Custom | + | ====== MapReduce Tutorial : Custom |
- | Every custom format reading keys of type '' | + | ====== Fast sorting comparator ====== |
- | ===== WholeFileInputFormat ===== | + | The keys are sorted before processed by a reducer, using a |
+ | [[http:// | ||
- | We start by creating '' | + | <code java> |
+ | public static class IntPair implements WritableComparable< | ||
+ | private int first = 0; | ||
+ | private int second = 0; | ||
- | The main functionality lays in '' | + | public void set(int left, int right) { first = left; second = right; } |
+ | public int getFirst() { return first; } | ||
+ | public int getSecond() { return second; } | ||
+ | |||
+ | public void readFields(DataInput | ||
+ | first = in.readInt(); | ||
+ | second = in.readInt(); | ||
+ | } | ||
+ | public void write(DataOutput out) throws IOException { | ||
+ | out.writeInt(first); | ||
+ | out.writeInt(second); | ||
+ | } | ||
+ | |||
+ | public int compareTo(IntPair o) { | ||
+ | if (first != o.first) return first < o.first ? -1 : 1; | ||
+ | else return second < o.second ? -1 : second == o.second ? 0 : 1; | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | If we would like in a Hadoop job to sort the '' | ||
+ | |||
+ | |||
+ | |||
+ | ====== Grouping comparator ====== | ||
+ | |||
+ | In a reduce, it is guaranteed that keys are processed in ascending order. Sometimes it would be useful if the //values associated with one key// could also be processed in ascending order. | ||
+ | |||
+ | ---- | ||
+ | |||
+ | < | ||
+ | <table style=" | ||
+ | < | ||
+ | <td style=" | ||
+ | <td style=" | ||
+ | <td style=" | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||