In [1]:
import (
"fmt"
"github.com/sjwhitworth/golearn/base"
"github.com/sjwhitworth/golearn/evaluation"
"github.com/sjwhitworth/golearn/knn"
)
<img style="float: left;", src="iris.jpg">
Information about this dataset comes from here
This is perhaps the best known database to be found in the pattern recognition literature. Fisher's paper is a classic in the field and is referenced frequently to this day. (See Duda & Hart, for example.) The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other.
Predicted attribute: class of iris plant.
This example model comes from the golearn documentation.
In [2]:
// Load in a dataset, with headers. Header attributes will be stored.
// Think of instances as a Data Frame structure in R or Pandas.
// You can also create instances from scratch.
rawData, err := base.ParseCSVToInstances("iris.csv", false)
//Initialises a new KNN classifier
cls := knn.NewKnnClassifier("euclidean", 2)
//Do a training-test split
trainData, testData := base.InstancesTrainTestSplit(rawData, 0.50)
cls.Fit(trainData)
//Calculates the Euclidean distance and returns the most popular label
predictions := cls.Predict(testData)
// Calculate precision/recall metrics, and summarize results
confusionMat, err := evaluation.GetConfusionMatrix(testData, predictions)
fmt.Println(evaluation.GetSummary(confusionMat))
Out[2]: