Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision Next revision Both sides next revision | ||
spark:spark-introduction [2014/11/03 20:31] straka |
spark:spark-introduction [2014/11/03 20:37] straka |
||
---|---|---|---|
Line 70: | Line 70: | ||
return min((np.sum((point - centers[i]) ** 2), i) for i in range(len(centers)))[1] | return min((np.sum((point - centers[i]) ** 2), i) for i in range(len(centers)))[1] | ||
- | lines = sc.textFile("/ | + | lines = sc.textFile("/ |
data = lines.map(lambda line: np.array([float(x) for x in line.split()])).cache() | data = lines.map(lambda line: np.array([float(x) for x in line.split()])).cache() | ||
Line 76: | Line 76: | ||
epsilon = 1e-3 | epsilon = 1e-3 | ||
- | centers = data.takeSample(False, | + | centers = data.takeSample(False, |
for i in range(5): | for i in range(5): | ||
old_centers = sc.broadcast(centers) | old_centers = sc.broadcast(centers) | ||
Line 89: | Line 89: | ||
| | ||
| | ||
- | |||
# If the change in center positions is less than epsilon, stop. | # If the change in center positions is less than epsilon, stop. | ||
centers_change = sum(np.sqrt(np.sum((a - b)**2)) for (a, b) in zip(centers, | centers_change = sum(np.sqrt(np.sum((a - b)**2)) for (a, b) in zip(centers, | ||
Line 112: | Line 111: | ||
centers.map(center => (center-point).norm(2)).zipWithIndex.min._2 | centers.map(center => (center-point).norm(2)).zipWithIndex.min._2 | ||
- | val lines = sc.textFile("/ | + | val lines = sc.textFile("/ |
val data = lines.map(line => Vector(line.split(" | val data = lines.map(line => Vector(line.split(" | ||