In [7]:
# You may need to Reconnect (more than Restart) the Kernel to pick up changes to these values

master = '--master spark://spark-master-2-1-0:7077'
conf = '--conf spark.cores.max=2 --conf spark.executor.memory=2g --conf spark.cassandra.connection.host=cassandra'
packages = '--packages com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.1,org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.1,com.datastax.spark:spark-cassandra-connector_2.11:2.0.0-M3,com.databricks:spark-avro_2.11:3.0.1,com.databricks:spark-xml_2.11:0.4.1'
jars = '--jars /root/lib/jpmml-sparkml-package-1.0-SNAPSHOT.jar'
py_files = '--py-files /root/lib/jpmml.py'

os.environ['PYSPARK_SUBMIT_ARGS'] = master \
  + ' ' + conf \
  + ' ' + packages \
  + ' ' + jars \
  + ' ' + py_files \
  + ' ' + 'pyspark-shell'

print(os.environ['PYSPARK_SUBMIT_ARGS'])


--master spark://spark-master-2-1-0:7077 --conf spark.cores.max=2 --conf spark.executor.memory=2g --conf spark.cassandra.connection.host=cassandra --packages com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.1,org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.1,com.datastax.spark:spark-cassandra-connector_2.11:2.0.0-M3,com.databricks:spark-avro_2.11:3.0.1,com.databricks:spark-xml_2.11:0.4.1 --jars /root/lib/jpmml-sparkml-package-1.0-SNAPSHOT.jar --py-files /root/lib/jpmml.py pyspark-shell

In [8]:
# Insert your PySpark code here...

In [11]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

In [12]:
# ...

In [ ]: