Differences
This shows you the differences between two versions of the page.
| Both sides previous revision Previous revision Next revision | Previous revision | ||
|
spark:spark-introduction [2022/12/14 12:29] straka [Running Spark Shell in Python] |
spark:spark-introduction [2022/12/14 13:28] (current) straka [Running Spark Shell in Python] |
||
|---|---|---|---|
| Line 5: | Line 5: | ||
| ===== Running Spark Shell in Python ===== | ===== Running Spark Shell in Python ===== | ||
| - | To run interactive Python shell in local Spark mode, run (on your local workstation or on cluster using '' | + | To run interactive Python shell in local Spark mode, run (on your local workstation or on cluster using '' |
| - | PYSPARK_DRIVER_PYTHON=ipython3 pyspark | + | |
| The PYSPARK_DRIVER_PYTHON=ipython3 parameter instructs Spark to use '' | The PYSPARK_DRIVER_PYTHON=ipython3 parameter instructs Spark to use '' | ||
| Line 29: | Line 29: | ||
| wiki = sc.textFile("/ | wiki = sc.textFile("/ | ||
| words = wiki.flatMap(lambda line: line.split()) | words = wiki.flatMap(lambda line: line.split()) | ||
| - | counts = words.map(lambda word: (word, 1)).reduceByKey(lambda c1,c2: c1+c2) | + | counts = words.map(lambda word: (word, 1)).reduceByKey(lambda c1, c2: c1+c2) |
| - | sorted = counts.sortBy(lambda | + | sorted = counts.sortBy(lambda |
| - | sorted.saveAsTextFile('output') | + | sorted.saveAsTextFile("output") |
| # Alternatively, | # Alternatively, | ||
| Line 37: | Line 37: | ||
| | | ||
| | | ||
| - | | + | |
| - | | + | |
| | | ||
| </ | </ | ||
| The output of ' | The output of ' | ||
| - | Note that 'map' and ' | + | Note that '' |
| The Scala versions is quite similar: | The Scala versions is quite similar: | ||
| Line 49: | Line 49: | ||
| val wiki = sc.textFile("/ | val wiki = sc.textFile("/ | ||
| val words = wiki.flatMap(line => line.split(" | val words = wiki.flatMap(line => line.split(" | ||
| - | val counts = words.map(word => (word, | + | val counts = words.map(word => (word, 1)).reduceByKey((c1, |
| val sorted = counts.sortBy({case (word, count) => count}, ascending=false) | val sorted = counts.sortBy({case (word, count) => count}, ascending=false) | ||
| sorted.saveAsTextFile(" | sorted.saveAsTextFile(" | ||
| Line 63: | Line 63: | ||
| ===== K-Means Example ===== | ===== K-Means Example ===== | ||
| - | An example implementing [[http:// | + | An example implementing [[http:// |
| <file python> | <file python> | ||
| import numpy as np | import numpy as np | ||
| Line 73: | Line 73: | ||
| data = lines.map(lambda line: np.array(map(float, | data = lines.map(lambda line: np.array(map(float, | ||
| - | K = 50 | + | K = 100 |
| epsilon = 1e-3 | epsilon = 1e-3 | ||
| Line 114: | Line 114: | ||
| val data = lines.map(line => Vector(line.split(" | val data = lines.map(line => Vector(line.split(" | ||
| - | val K = 50 | + | val K = 100 |
| val epsilon = 1e-3 | val epsilon = 1e-3 | ||
