In [1]:
Sys.setenv(SPARK_HOME='/usr/lib/spark')
.libPaths(c(file.path(Sys.getenv('SPARK_HOME'), 'R', 'lib'), .libPaths()))

In [2]:
library(SparkR)


Attaching package: ‘SparkR’

The following objects are masked from ‘package:stats’:

    cov, filter, lag, na.omit, predict, sd, var, window

The following objects are masked from ‘package:base’:

    as.data.frame, colnames, colnames<-, drop, endsWith, intersect,
    rank, rbind, sample, startsWith, subset, summary, transform, union


In [3]:
appName <- 'r_spark_context'
masterURL <- 'spark://pheno0.phenovari-utwente.surf-hosted.nl:7077'

sc <- sparkR.session(master=masterURL, appName=)


Spark package found in SPARK_HOME: /usr/lib/spark
Launching java with spark-submit command /usr/lib/spark/bin/spark-submit   sparkr-shell /tmp/RtmpLfcZC8/backend_port2c601e8c51e4 

In [13]:
offline_dir_path <- "hdfs:///user/pheno/spring-index/"
geoTiff_dir <- "BloomFinal"
wssse_csv_path <- paste(offline_dir_path, geoTiff_dir, "/wssse.csv", sep="")

In [32]:
columns <- c("num_clusters", "num_iterations", "wssse")

data <- read.df(wssse_csv_path, source="csv") #, header=FALSE) #, col.names = columns, stringsAsFactors = FALSE)


Error: Error in loadDF : No status is returned. Java SparkR backend might have failed.

Traceback:

1. read.df(wssse_csv_path, source = "csv", header = FALSE)
2. dispatchFunc("read.df(path = NULL, source = NULL, schema = NULL, ...)", 
 .     x, ...)
3. f(x, ...)
4. handledCallJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", 
 .     sparkSession, source, options)
5. tryCatch(callJStatic(cls, method, ...), error = function(e) {
 .     captureJVMException(e, method)
 . })
6. tryCatchList(expr, classes, parentenv, handlers)
7. tryCatchOne(expr, names, parentenv, handlers[[1L]])
8. value[[3L]](cond)
9. captureJVMException(e, method)
10. stop(stacktrace, call. = FALSE)

In [30]:
print(data[0,1])


Error in data[0, 1]: Expressions other than filtering predicates are not supported in the first parameter of extract operator [ or subset() method.
Traceback:

1. print(data[0, 1])
2. data[0, 1]
3. data[0, 1]
4. stop(paste0("Expressions other than filtering predicates are not supported ", 
 .     "in the first parameter of extract operator [ or subset() method."))

In [23]:
plot(data, x='_c0', y='_c2')


Error in factor(input$TESTCELL_CONTENT): object 'input' not found
Traceback:

1. factor(input$TESTCELL_CONTENT)

In [ ]: