In [1]:
import spark_jupyter
In [2]:
from pyspark import SparkContext, SQLContext
sc = SparkContext("local[*]", "Simple App")
hc = SQLContext(sc)
In [3]:
df = hc.createDataFrame([['a', 'b', 'c'],['e', 'f', 'g']], schema=['A', 'B', 'C'])
df.show()
Out[3]:
In [4]:
df.printSchema()
Out[4]: