Python 3

Version


In [1]:
import sys
print(sys.version)


3.6.9 (default, Nov  7 2019, 10:44:02) 
[GCC 8.3.0]

Spark


In [2]:
import os
from pyspark import SparkContext
from pyspark import SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [3]:
spark_home = os.environ.get('SPARK_HOME', None)
print(spark_home)


/usr/local/spark-2.4.5

In [4]:
spark = SparkSession.builder.master("local[*]").appName("spark")
spark = spark.config("spark.driver.memory", "8g")
spark = spark.config("spark.executor.memory", "8g")
spark = spark.config("spark.python.worker.memory", "8g")
spark = spark.getOrCreate()

In [5]:
!cat /usr/local/spark/examples/src/main/resources/people.json


{"name":"Michael"}
{"name":"Andy", "age":30}
{"name":"Justin", "age":19}

In [6]:
df = spark.read.json("{}/examples/src/main/resources/people.json".format(spark_home))
df.show()


+----+-------+
| age|   name|
+----+-------+
|null|Michael|
|  30|   Andy|
|  19| Justin|
+----+-------+


In [7]:
spark.stop()

In [ ]: