We run yarn by the following instruction https://hadoop.apache.org/docs/r3.1.2/hadoop-project-dist/hadoop-common/SingleCluster.html
Additionally we have to add the following property to etc/hadoop/yarn-site.xml
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>5</value>
</property>
or
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
In [ ]:
import os
os.environ["HADOOP_CONF_DIR"] = PATH_TO_HADOOP_CONF_DIR
In [ ]:
%%spark --yarn
from pyspark.sql import SparkSession
SparkSession.builder \
.appName("SparkYarnBeakerxSupport3")
In [ ]:
import random
def inside(p):
x, y = random.random(), random.random()
return x*x + y*y < 1
NUM_SAMPLES =100000000
count = sc.parallelize(range(0, NUM_SAMPLES)).filter(inside).count()
print("Pi is roughly %f" % (4.0 * count / NUM_SAMPLES))
In [ ]:
spark.stop()
In [ ]:
sc.stop()