Differences
This shows you the differences between two versions of the page.
| Next revision | Previous revision | ||
|
spark:using-scala [2014/11/10 15:43] straka created |
spark:using-scala [2024/09/27 09:21] (current) straka [Usage Examples] |
||
|---|---|---|---|
| Line 20: | Line 20: | ||
| </ | </ | ||
| - | * run interactive shell inside '' | + | * run interactive shell using existing Spark cluster (i.e., |
| < | < | ||
| * run interactive shell with local Spark cluster using one thread: | * run interactive shell with local Spark cluster using one thread: | ||
| < | < | ||
| - | * start Spark cluster (10 machines, | + | * start Spark cluster (10 machines, |
| - | < | + | < |
| + | |||
| + | |||
| + | ===== Running Scala Spark Applications ===== | ||
| + | |||
| + | Compiled Scala Spark program (JAR) can be started using: | ||
| + | < | ||
| + | |||
| + | As described in [[running-spark-on-single-machine-or-on-cluster|Running Spark on Single Machine or on Cluster]], environmental variable '' | ||
| + | |||
| + | ==== Compilation of Scala Spark Programs ==== | ||
| + | |||
| + | If you do not know how to compile Scala programs, you can use the following directions: | ||
| + | - create a directory for your project | ||
| + | - copy ''/ | ||
| + | - replace the '' | ||
| + | - run '' | ||
| + | The resulting JAR can be found in '' | ||
| + | |||
| + | ==== Usage Examples ==== | ||
| + | Consider the following simple word-count application '' | ||
| + | <file scala> | ||
| + | import org.apache.spark.SparkContext | ||
| + | import org.apache.spark.SparkContext._ | ||
| + | |||
| + | object Main { | ||
| + | def main(args: Array[String]) { | ||
| + | if (args.length < 2) sys.error(" | ||
| + | val (input, output) = (args(0), args(1)) | ||
| + | |||
| + | val sc = new SparkContext() | ||
| + | sc.textFile(input, | ||
| + | .flatMap(_.split(" | ||
| + | .map((_, | ||
| + | .sortBy(_._2, | ||
| + | .saveAsTextFile(output) | ||
| + | sc.stop() | ||
| + | } | ||
| + | } | ||
| + | </ | ||
| + | |||
| + | The '' | ||
| + | < | ||
| + | name := " | ||
| + | |||
| + | version := " | ||
| + | |||
| + | scalaVersion := " | ||
| + | |||
| + | libraryDependencies += " | ||
| + | </ | ||
| + | |||
| + | * compile the application | ||
| + | < | ||
| + | |||
| + | * run '' | ||
| + | < | ||
| + | * run '' | ||
| + | < | ||
| + | * start Spark cluster (10 machines, 2GB RAM each) on Slurm and run '' | ||
| + | < | ||
