In [1]:
import sys
print(sys.version)
In [2]:
import os
from pyspark import SparkContext
from pyspark import SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
In [3]:
spark_home = os.environ.get('SPARK_HOME', None)
print(spark_home)
In [4]:
spark = SparkSession.builder.master("local[*]").appName("spark")
spark = spark.config("spark.driver.memory", "8g")
spark = spark.config("spark.executor.memory", "8g")
spark = spark.config("spark.python.worker.memory", "8g")
spark = spark.getOrCreate()
In [5]:
!cat /usr/local/spark/examples/src/main/resources/people.json
In [6]:
df = spark.read.json("{}/examples/src/main/resources/people.json".format(spark_home))
df.show()
In [7]:
spark.stop()
In [ ]: