notebook.community

Edit and run



In [1]:

    
// import $ivy.`net.tixxit::framian:0.5.0`
// import framian.csv.Csv
// import java.io.File
// case class Country(name: String, code: String, code2: String, numeric: Int)
// Csv.parseFile(new File("../../data/countries.csv"))
import ammonite.ops._
interp.load.cp(pwd)

// import jt.domain.Country









    Out[1]:





import ammonite.ops._



In [2]:

    
import $exec.`init_spark`
import sparkSession.implicits._









    



Compiling init_spark.sc






    



log4j:WARN No appenders could be found for logger (io.netty.util.internal.logging.InternalLoggerFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.






    Out[2]:





import $exec.$           

import sparkSession.implicits._



In [3]:

    
val df = sparkSession.read.load("../../data/NY.GDP.MKTP.CD.parquet")









    



SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.






    Out[3]:





df: DataFrame = [country: string, year: bigint ... 1 more field]



In [4]:

    
val x = df.sort($"year").filter($"country" === "USA" && $"year" > 2000)









    Out[4]:





x: Dataset[Row] = [country: string, year: bigint ... 1 more field]



In [14]:

    
val y = x.select($"country", $"year", $"NY_GDP_MKTP_CD" / 1e12 alias "GDP")









    Out[14]:





y: DataFrame = [country: string, year: bigint ... 1 more field]



In [21]:

    
y.groupBy($"country").avg().show()









    



                                                                                






    



+-------+---------+-------------+
|country|avg(year)|     avg(GDP)|
+-------+---------+-------------+
|    USA|   2008.5|14.5798391875|
+-------+---------+-------------+



In [ ]: