In [1]:
from sklearn import datasets
In [2]:
iris = datasets.load_iris()
In [3]:
# by analogy of f(x) = y
X = iris.data # array([[ 5.1, 3.5, 1.4, 0.2], [ 4.9, 3. , 1.4, 0.2]])
y = iris.target # array([0, 0])
In [4]:
from sklearn.cross_validation import train_test_split
In [5]:
# 0.5 means use half of data for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
In [6]:
from sklearn import tree
In [7]:
my_classifier = tree.DecisionTreeClassifier()
In [8]:
my_classifier.fit(X_train, y_train)
Out[8]:
In [9]:
predictions = my_classifier.predict(X_test) # array([2, 0, 2])
In [10]:
from sklearn.metrics import accuracy_score
In [11]:
accuracy_score(y_test, predictions)
Out[11]:
In [14]:
from sklearn.neighbors import KNeighborsClassifier
my_classifier = KNeighborsClassifier()
In [15]:
my_classifier.fit(X_train, y_train)
predictions = my_classifier.predict(X_test) # array([2, 0, 2])
accuracy_score(y_test, predictions)
Out[15]:
In [ ]: