In [1]:
// import $ivy.`net.tixxit::framian:0.5.0`
// import framian.csv.Csv
// import java.io.File
// case class Country(name: String, code: String, code2: String, numeric: Int)
// Csv.parseFile(new File("../../data/countries.csv"))
import ammonite.ops._
interp.load.cp(pwd)
// import jt.domain.Country
Out[1]:
In [2]:
import $exec.`init_spark`
import sparkSession.implicits._
Out[2]:
In [3]:
val df = sparkSession.read.load("../../data/NY.GDP.MKTP.CD.parquet")
Out[3]:
In [4]:
val x = df.sort($"year").filter($"country" === "USA" && $"year" > 2000)
Out[4]:
In [14]:
val y = x.select($"country", $"year", $"NY_GDP_MKTP_CD" / 1e12 alias "GDP")
Out[14]:
In [21]:
y.groupBy($"country").avg().show()
In [ ]: