In [ ]:
%%classpath add mvn
org.apache.spark spark-core_2.11 2.3.1
org.apache.spark spark-sql_2.11 2.3.1
org.apache.spark spark-hive_2.11 2.3.1

In [ ]:
import org.apache.spark.sql.SparkSession
val spark = SparkSession.builder()
    .master("local")
    .appName("parquet example")
    .getOrCreate()
spark

In [ ]:
val peopleDF = spark.read.json("../resources/data/people.json")

In [ ]:
import spark.implicits._
peopleDF.write.parquet("people.parquet")

In [ ]:
import spark.implicits._
val parquetFileDF = spark.read.parquet("people.parquet")
parquetFileDF.createOrReplaceTempView("parquetFile")
val namesDF = spark.sql("SELECT name FROM parquetFile WHERE age BETWEEN 13 AND 19")
namesDF.map(attributes => "Name: " + attributes(0)).show()

In [ ]:
peopleDF.count

In [ ]: