In [1]:
# import packages
import h2o, os.path
import sparkling_water as sw
from pyspark import SparkConf, SparkContext
In [2]:
# create spark context
conf = (SparkConf()
.setMaster("local")
.setAppName("My app")
.set("spark.executor.memory", "512mb"))
sc = SparkContext(conf=conf)
In [3]:
# connect to existing h2o cluster
ip="192.168.0.14"
port=54323
h2o.init(ip=ip,port=port)
In [4]:
# create rdd
data = [1, 2, 3, 4, 5]
distData = sc.parallelize(data)
In [5]:
#Upload frame from RDD
h2o_frame = sw.Utils.upload_frame_from_rdd(distData)
In [6]:
h2o_frame.describe()
In [7]:
# Download the dataset
home = os.path.expanduser("~")
filename =home + os.path.sep + "downloaded_dataset"
h2o.download_csv(h2o_frame,filename)
In [ ]: