Differences

This shows you the differences between two versions of the page.

--- spark:spark-introduction [2014/11/03 20:31]
straka
+++ spark:spark-introduction [2014/11/03 20:37]
straka
@@ Line 70: / Line 70: @@
     return min((np.sum((point - centers[i]) ** 2), i) for i in range(len(centers)))[1]
-lines = sc.textFile("/net/projects/hadoop/examples/inputs/points-small/points.txt")
+lines = sc.textFile("/net/projects/hadoop/examples/inputs/points-small/points.txt", sc.defaultParallelism)
 data = lines.map(lambda line: np.array([float(x) for x in line.split()])).cache()
@@ Line 76: / Line 76: @@
 epsilon = 1e-3
-centers = data.takeSample(False, K   )    # Sample K random points
+centers = data.takeSample(False, K)       # Sample K random points
 for i in range(5):                        # Perform 5 iterations
     old_centers = sc.broadcast(centers)
@@ Line 89: / Line 89: @@
                .map(lambda (index, (sum, count)): sum / count)
                .collect())
     # If the change in center positions is less than epsilon, stop.
     centers_change = sum(np.sqrt(np.sum((a - b)**2)) for (a, b) in zip(centers, old_centers.value))
@@ Line 112: / Line 111: @@
   centers.map(center => (center-point).norm(2)).zipWithIndex.min._2
-val lines = sc.textFile("/net/projects/hadoop/examples/inputs/points-small/points.txt")
+val lines = sc.textFile("/net/projects/hadoop/examples/inputs/points-small/points.txt", sc.defaultParallelism)
 val data = lines.map(line => Vector(line.split("\\s+").map(_.toDouble))).cache()

[ Back to the navigation ] [ Back to the content ]

Institute of Formal and Applied Linguistics Wiki

Differences