In [1]:
// import $ivy.`net.tixxit::framian:0.5.0`
// import framian.csv.Csv
// import java.io.File
// case class Country(name: String, code: String, code2: String, numeric: Int)
// Csv.parseFile(new File("../../data/countries.csv"))
import ammonite.ops._
interp.load.cp(pwd)

// import jt.domain.Country


Out[1]:
import ammonite.ops._

In [2]:
import $exec.`init_spark`
import sparkSession.implicits._


Compiling init_spark.sc
log4j:WARN No appenders could be found for logger (io.netty.util.internal.logging.InternalLoggerFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
Out[2]:
import $exec.$           

import sparkSession.implicits._

In [3]:
val df = sparkSession.read.load("../../data/NY.GDP.MKTP.CD.parquet")


SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
Out[3]:
df: DataFrame = [country: string, year: bigint ... 1 more field]

In [4]:
val x = df.sort($"year").filter($"country" === "USA" && $"year" > 2000)


Out[4]:
x: Dataset[Row] = [country: string, year: bigint ... 1 more field]

In [14]:
val y = x.select($"country", $"year", $"NY_GDP_MKTP_CD" / 1e12 alias "GDP")


Out[14]:
y: DataFrame = [country: string, year: bigint ... 1 more field]

In [21]:
y.groupBy($"country").avg().show()


                                                                                
+-------+---------+-------------+
|country|avg(year)|     avg(GDP)|
+-------+---------+-------------+
|    USA|   2008.5|14.5798391875|
+-------+---------+-------------+


In [ ]: