Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision | Next revision Both sides next revision | ||
spark:spark-introduction [2014/11/03 20:31] straka |
spark:spark-introduction [2014/11/03 20:35] straka |
||
---|---|---|---|
Line 70: | Line 70: | ||
return min((np.sum((point - centers[i]) ** 2), i) for i in range(len(centers)))[1] | return min((np.sum((point - centers[i]) ** 2), i) for i in range(len(centers)))[1] | ||
- | lines = sc.textFile("/ | + | lines = sc.textFile("/ |
data = lines.map(lambda line: np.array([float(x) for x in line.split()])).cache() | data = lines.map(lambda line: np.array([float(x) for x in line.split()])).cache() | ||
Line 76: | Line 76: | ||
epsilon = 1e-3 | epsilon = 1e-3 | ||
- | centers = data.takeSample(False, | + | centers = data.takeSample(False, |
for i in range(5): | for i in range(5): | ||
old_centers = sc.broadcast(centers) | old_centers = sc.broadcast(centers) | ||
Line 89: | Line 89: | ||
| | ||
| | ||
- | |||
# If the change in center positions is less than epsilon, stop. | # If the change in center positions is less than epsilon, stop. | ||
centers_change = sum(np.sqrt(np.sum((a - b)**2)) for (a, b) in zip(centers, | centers_change = sum(np.sqrt(np.sum((a - b)**2)) for (a, b) in zip(centers, |