Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision | Next revision Both sides next revision | ||
spark:spark-introduction [2022/12/14 12:36] straka [Word Count Example] |
spark:spark-introduction [2022/12/14 12:42] straka [K-Means Example] |
||
---|---|---|---|
Line 63: | Line 63: | ||
===== K-Means Example ===== | ===== K-Means Example ===== | ||
- | An example implementing [[http:// | + | An example implementing [[http:// |
<file python> | <file python> | ||
import numpy as np | import numpy as np | ||
Line 70: | Line 70: | ||
return min((np.sum((point - centers[i]) ** 2), i) for i in range(len(centers)))[1] | return min((np.sum((point - centers[i]) ** 2), i) for i in range(len(centers)))[1] | ||
- | lines = sc.textFile("/ | + | lines = sc.textFile("/ |
data = lines.map(lambda line: np.array(map(float, | data = lines.map(lambda line: np.array(map(float, | ||
- | K = 50 | + | K = 100 |
epsilon = 1e-3 | epsilon = 1e-3 | ||
Line 111: | Line 111: | ||
centers.map(center => (center-point).norm(2)).zipWithIndex.min._2 | centers.map(center => (center-point).norm(2)).zipWithIndex.min._2 | ||
- | val lines = sc.textFile("/ | + | val lines = sc.textFile("/ |
val data = lines.map(line => Vector(line.split(" | val data = lines.map(line => Vector(line.split(" | ||
- | val K = 50 | + | val K = 100 |
val epsilon = 1e-3 | val epsilon = 1e-3 | ||