In [ ]:
%%classpath add mvn
org.apache.spark spark-core_2.11 2.3.1
org.apache.spark spark-sql_2.11 2.3.1
org.apache.spark spark-hive_2.11 2.3.1
In [ ]:
import org.apache.spark.sql.SparkSession
val spark = SparkSession.builder()
.master("local")
.appName("parquet example")
.getOrCreate()
spark
In [ ]:
val peopleDF = spark.read.json("../resources/data/people.json")
In [ ]:
import spark.implicits._
peopleDF.write.parquet("people.parquet")
In [ ]:
import spark.implicits._
val parquetFileDF = spark.read.parquet("people.parquet")
parquetFileDF.createOrReplaceTempView("parquetFile")
val namesDF = spark.sql("SELECT name FROM parquetFile WHERE age BETWEEN 13 AND 19")
namesDF.map(attributes => "Name: " + attributes(0)).show()
In [ ]:
peopleDF.count
In [ ]: