Apache Spark is a lightning-fast cluster computing API based on Scala. Deeper integration with Spark is on the agenda.
In [ ]:
%classpath add mvn org.apache.spark spark-sql_2.11 2.2.1
org.apache.log4j.Logger.getRootLogger().setLevel(org.apache.log4j.Level.ERROR);
In [ ]:
import org.apache.spark.sql.SparkSession
val spark = SparkSession.builder()
.appName("Simple Application")
.master("local[4]")
.config("spark.ui.enabled", "false")
.getOrCreate()
In [ ]:
val NUM_SAMPLES = 10000000
val count = spark.sparkContext.parallelize(1 to NUM_SAMPLES).map{i =>
val x = Math.random()
val y = Math.random()
if (x*x + y*y < 1) 1 else 0
}.reduce(_ + _)
println("Pi is roughly " + 4.0 * count / NUM_SAMPLES)
In [ ]:
spark.stop()
In [ ]: