In [1]:
# @hidden_cell
# This function is used to setup the access of Spark to your Object Storage. The definition contains your credentials.
# You might want to remove those credentials before you share your notebook.
setHadoopConfigWithCredentials_d3bd5b94a9334de59a55a7fed2bedeaa <- function(name) {
# This function sets the Hadoop configuration so it is possible to
# access data from Bluemix Object Storage using Spark
prefix = paste("fs.swift.service" , name, sep =".")
hConf = SparkR:::callJMethod(sc, "hadoopConfiguration")
SparkR:::callJMethod(hConf, "set", paste(prefix, "auth.url", sep='.'), paste("https://identity.open.softlayer.com","/v3/auth/tokens",sep=""))
SparkR:::callJMethod(hConf, "set", paste(prefix, "auth.endpoint.prefix", sep='.'), "endpoints")
SparkR:::callJMethod(hConf, "set", paste(prefix, "tenant", sep='.'), "6aaf54352357483486ee2d4981f8ef15")
SparkR:::callJMethod(hConf, "set", paste(prefix, "username", sep='.'), "c0eebedc019f4413be3f3d656821b35f")
SparkR:::callJMethod(hConf, "set", paste(prefix, "password", sep='.'), "ji[T[l.(7D&gld*5")
SparkR:::callJMethod(hConf, "set", paste(prefix, "region", sep='.'), "dallas")
invisible(SparkR:::callJMethod(hConf, "setBoolean", paste(prefix, "public", sep='.'), FALSE))
}
name <- "keystone"
setHadoopConfigWithCredentials_d3bd5b94a9334de59a55a7fed2bedeaa(name)
invisible(sparkR.session(appName = "test SparkSession R"))
In [2]:
df.data.1 <- read.json(paste("swift://", "coursera", "." , name,"/", "bearing1_1_acc_transformed_youtube.json", sep=""),
source = "org.apache.spark.sql.execution.datasources.csv.CSVFileFormat", header = "true")
head(df.data.1)
In [3]:
n = nrow(df.data.1)
n
In [4]:
createOrReplaceTempView(df.data.1,"data")
df_sample = sql("select * from data where rand() <= .1 order by ts asc")
In [5]:
n = nrow(df_sample)
n
In [6]:
df_sample_rdf = collect(df_sample)
In [7]:
colnames(df_sample_rdf)
In [8]:
attach(df_sample_rdf)
plot(ts,hacc, type="o", col="blue")
detach(df_sample_rdf)
In [9]:
attach(df_sample_rdf)
plot(ts,vacc, type="o", col="blue")
detach(df_sample_rdf)
In [10]:
df_grouped = sql("
select cluster,
mean(hacc) as mhacc,
mean(vacc) as mvacc,
STDDEV_POP(hacc) as sdhacc,
STDDEV_POP(vacc) as sdvacc
from data
group by cluster
order by cluster asc")
In [11]:
df_grouped_local = collect(df_grouped)
In [12]:
df_grouped_local
In [13]:
nrow(df_grouped_local)
In [14]:
attach(df_grouped_local)
plot(cluster,sdhacc)
detach(df_grouped_local)
In [15]:
attach(df_grouped_local)
plot(cluster,sdvacc)
detach(df_grouped_local)
In [16]:
attach(df_grouped_local)
plot(cluster,mhacc)
detach(df_grouped_local)
In [17]:
attach(df_grouped_local)
plot(cluster,mvacc)
detach(df_grouped_local)
In [18]:
install.packages("wavelets")
In [19]:
library(wavelets)
In [20]:
attach(df_sample_rdf)
wt = dwt(vacc, filter="haar", boundary="periodic")
detach(df_sample_rdf)
In [21]:
head(unlist(c(wt@W,wt@V[[wt@level]])))
In [ ]: