In [2]:
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql._
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
//Data is not aggregated
val co2 = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load("Co2all.csv")
//Add a column that has Year as a true date type
val convertDate = udf {
(year:Int) => year+ "-01-01 " +year + " 00:00 UTC"
}
val co2dates = co2.withColumn("year_as_date", to_date(convertDate(co2("Year"))) )
//Data is aggregated by Spark
val co2agg = co2.groupBy("CO2 per capita").agg( avg("value") as "Mean Co2")
Out[2]:
In [3]:
%%brunel data('co2agg') map(low) x(CO2_per_capita) color(Mean_Co2) tooltip(#all):: width=800, height=500
Out[3]:
In [4]:
%%brunel data('co2dates')
map(low) bar x(CO2_per_capita) color(value) mean(value) interaction(select) at(0,0,100,50) tooltip(#all) |
x(year_as_date) y(value) line label(CO2_per_capita) interaction(filter) at(0,50, 100,100)
:: width=900, height=600
Out[4]:
In [ ]: