This is a sample machine learning demo created with Alink from Alibaba. Cleuton Sampaio, Data Learning Hub
In [1]:
#Imports
from pyalink.alink import *
In [2]:
#Environment configuration
useLocalEnv(1, flinkHome=None, config=None)
#parallism We will not use, but we could use a Flink cluster https://flink.apache.org/poweredby.html
Out[2]:
In [3]:
#Preparing dataframe
#we'll read a CSV dataset containing Weights and Heights of students. We'll try to predict Weight based on Height
URL = "./weight-height.csv"
SCHEMA_STR = "weight double,height double"
mnist_data = CsvSourceBatchOp() \
.setFilePath(URL) \
.setSchemaStr(SCHEMA_STR)\
.setFieldDelimiter(",")
spliter = SplitBatchOp().setFraction(0.8)
train = spliter.linkFrom(mnist_data)
test = spliter.getSideOutput(0)
In [4]:
#Creating Linear Regression Model based on operator
lr = LinearRegression().setFeatureCols(["weight"]).setLabelCol("height").setPredictionCol("prediction")
In [5]:
#Training and printing results
model = lr.fit(train)
model.transform(train).print()
In [ ]: