| Both sides previous revision
Previous revision
Next revision
|
Previous revision
|
courses:mapreduce-tutorial:step-31 [2012/02/06 09:01] straka |
courses:mapreduce-tutorial:step-31 [2012/02/06 14:52] (current) dusek |
| |
| ===== Example ===== | ===== Example ===== |
| This example reads the keys of ''/net/projects/hadoop/examples/inputs/numbers-small'', computes the sum of all the keys and print it: | This example reads the keys of ''/net/projects/hadoop/examples/inputs/numbers-small'', computes the sum of all the keys and prints it: |
| <code java Sum.java> | <code java Sum.java> |
| | import java.io.IOException; |
| | |
| | import org.apache.hadoop.conf.*; |
| | import org.apache.hadoop.fs.Path; |
| | import org.apache.hadoop.io.*; |
| import org.apache.hadoop.mapreduce.*; | import org.apache.hadoop.mapreduce.*; |
| import org.apache.hadoop.mapreduce.lib.allreduce.*; | import org.apache.hadoop.mapreduce.lib.allreduce.*; |
| You can run the example locally using: | You can run the example locally using: |
| wget --no-check-certificate 'https://wiki.ufal.ms.mff.cuni.cz/_export/code/courses:mapreduce-tutorial:step-31?codeblock=0' -O Sum.java | wget --no-check-certificate 'https://wiki.ufal.ms.mff.cuni.cz/_export/code/courses:mapreduce-tutorial:step-31?codeblock=0' -O Sum.java |
| make -f /net/projects/hadoop/java/Makefile Sum.java | make -f /net/projects/hadoop/java/Makefile Sum.jar |
| rm -rf step-31-out; /net/projects/hadoop/bin/hadoop Sum.jar /net/projects/hadoop/examples/inputs/numbers-small step-31-out | rm -rf step-31-out; /net/projects/hadoop/bin/hadoop Sum.jar /net/projects/hadoop/examples/inputs/numbers-small step-31-out |
| less step-31-out/part-* | less step-31-out/part-* |
| | |
| To run on a cluster with //C// machines using //C// mappers: | To run on a cluster using specified number of machines: |
| rm -rf step-31-out; /net/projects/hadoop/bin/hadoop Sum.jar -c C `/net/projects/hadoop/bin/compute-splitsize /net/projects/hadoop/examples/inputs/numbers-small C` /net/projects/hadoop/examples/inputs/numbers-small step-31-out | rm -rf step-31-out; M=#of_machines; INPUT=/net/projects/hadoop/examples/inputs/numbers-small; /net/projects/hadoop/bin/hadoop Sum.jar -c $M `/net/projects/hadoop/bin/compute-splitsize $INPUT $M` $INPUT step-31-out |
| less step-31-out/part-* | less step-31-out/part-* |
| |
| # NOW VIEW THE FILE | # NOW VIEW THE FILE |
| # $EDITOR Statistics.java | # $EDITOR Statistics.java |
| make -f /net/projects/hadoop/java/Makefile Statistics.java | make -f /net/projects/hadoop/java/Makefile Statistics.jar |
| rm -rf step-31-out; /net/projects/hadoop/bin/hadoop Statistics.jar -c C `/net/projects/hadoop/bin/compute-splitsize /net/projects/hadoop/examples/inputs/numbers-small C` /net/projects/hadoop/examples/inputs/numbers-small step-31-out | rm -rf step-31-out; M=#of_machines; INPUT=/net/projects/hadoop/examples/inputs/numbers-small; /net/projects/hadoop/bin/hadoop Statistics.jar -c $M `/net/projects/hadoop/bin/compute-splitsize $INPUT $M` $INPUT step-31-out |
| less step-31-out/part-* | less step-31-out/part-* |
| |
| # NOW VIEW THE FILE | # NOW VIEW THE FILE |
| # $EDITOR Median.java | # $EDITOR Median.java |
| make -f /net/projects/hadoop/java/Makefile Median.java | make -f /net/projects/hadoop/java/Makefile Median.jar |
| rm -rf step-31-out; /net/projects/hadoop/bin/hadoop Median.jar -c C `/net/projects/hadoop/bin/compute-splitsize /net/projects/hadoop/examples/inputs/numbers-small C` /net/projects/hadoop/examples/inputs/numbers-small step-31-out | rm -rf step-31-out; M=#of_machines; INPUT=/net/projects/hadoop/examples/inputs/numbers-small; /net/projects/hadoop/bin/hadoop Median.jar -c $M `/net/projects/hadoop/bin/compute-splitsize $INPUT $M` $INPUT step-31-out |
| less step-31-out/part-* | less step-31-out/part-* |
| |
| * ''clusters.file'' -- file where to read the initial clusters from | * ''clusters.file'' -- file where to read the initial clusters from |
| You can download and compile it using: | You can download and compile it using: |
| wget --no-check-certificate 'https://wiki.ufal.ms.mff.cuni.cz/_media/courses:mapreduce-tutorial:step-31-exercise3.txt' -O KMeans.java.java | wget --no-check-certificate 'https://wiki.ufal.ms.mff.cuni.cz/_media/courses:mapreduce-tutorial:step-31-exercise3.txt' -O KMeans.java |
| # NOW VIEW THE FILE | # NOW VIEW THE FILE |
| # $EDITOR KMeans.java.java | # $EDITOR KMeans.java |
| make -f /net/projects/hadoop/java/Makefile KMeans.java.java | make -f /net/projects/hadoop/java/Makefile KMeans.jar |
| You can run it using specified number of machines on the following input data: | You can run it using specified number of machines on the following input data: |
| * ''/net/projects/hadoop/examples/inputs/points-small'': | * ''/net/projects/hadoop/examples/inputs/points-small'': |
| <code>M=machines; K=50; INPUT=/net/projects/hadoop/examples/inputs/points-small/points.txt | <code>M=#of_machines; K=50; INPUT=/net/projects/hadoop/examples/inputs/points-small/points.txt |
| rm -rf step-31-out; /net/projects/hadoop/bin/hadoop KMeans.java.jar -Dclusters.num=$K -Dclusters.file=$INPUT [-jt jobtracker | -c $M] `/net/projects/hadoop/bin/compute-splitsize $INPUT $M` $INPUT step-31-out</code> | rm -rf step-31-out; /net/projects/hadoop/bin/hadoop KMeans.jar -Dclusters.num=$K -Dclusters.file=$INPUT -c $M `/net/projects/hadoop/bin/compute-splitsize $INPUT $M` $INPUT step-31-out</code> |
| * ''/net/projects/hadoop/examples/inputs/points-medium'': | * ''/net/projects/hadoop/examples/inputs/points-medium'': |
| <code>M=machines; K=100; INPUT=/net/projects/hadoop/examples/inputs/points-medium/points.txt | <code>M=#of_machines; K=100; INPUT=/net/projects/hadoop/examples/inputs/points-medium/points.txt |
| rm -rf step-31-out; /net/projects/hadoop/bin/hadoop KMeans.java.jar -Dclusters.num=$K -Dclusters.file=$INPUT [-jt jobtracker | -c $M] `/net/projects/hadoop/bin/compute-splitsize $INPUT $M` $INPUT step-31-out</code> | rm -rf step-31-out; /net/projects/hadoop/bin/hadoop KMeans.jar -Dclusters.num=$K -Dclusters.file=$INPUT -c $M `/net/projects/hadoop/bin/compute-splitsize $INPUT $M` $INPUT step-31-out</code> |
| * ''/net/projects/hadoop/examples/inputs/points-large'': | * ''/net/projects/hadoop/examples/inputs/points-large'': |
| <code>M=machines; K=200; INPUT=/net/projects/hadoop/examples/inputs/points-large/points.txt | <code>M=#of_machines; K=200; INPUT=/net/projects/hadoop/examples/inputs/points-large/points.txt |
| rm -rf step-31-out; /net/projects/hadoop/bin/hadoop KMeans.java.jar -Dclusters.num=$K -Dclusters.file=$INPUT [-jt jobtracker | -c $M] `/net/projects/hadoop/bin/compute-splitsize $INPUT $M` $INPUT step-31-out</code> | rm -rf step-31-out; /net/projects/hadoop/bin/hadoop KMeans.jar -Dclusters.num=$K -Dclusters.file=$INPUT -c $M `/net/projects/hadoop/bin/compute-splitsize $INPUT $M` $INPUT step-31-out</code> |
| |
| Solution: {{:courses:mapreduce-tutorial:step-31-solution3.txt|KMeans.java}}. | Solution: {{:courses:mapreduce-tutorial:step-31-solution3.txt|KMeans.java}}. |
| |