In [1]:
from pyspark import (SparkContext, SparkConf)
In [2]:
# http://spark.apache.org/docs/1.2.0/configuration.html
conf = SparkConf()
# https://spark.apache.org/faq.html
# local[N] or local[*]
conf.setMaster("local[10]").setAppName("Simple App")
#conf.set("spark.cores.max", "10")
sc = SparkContext(conf=conf)
In [3]:
#sc = SparkContext(master="local", appName="Simple App")
r = sc.parallelize(range(10000))
In [4]:
from math import factorial, log10
fact_sum = r.map(factorial).sum()
log10(fact_sum)
Out[4]:
In [5]:
!ls /spark/sbin
In [6]:
!ls ./sbin/start-master.sh
I will move to mesos, because mesos support docker
http://spark.apache.org/docs/1.2.0/running-on-mesos.html
Maybe try https://elastic.mesosphere.io/ --> but is there a script for this?
http://mesos.apache.org/documentation/latest/ec2-scripts/
https://digitalocean.mesosphere.com/clusters/new
17 steps in https://mesosphere.com/docs/tutorials/run-spark-on-mesos/ ???
In [ ]: