In [2]:
import sklearn
from sklearn.datasets import load_iris
iris_data = load_iris()
In [3]:
print(iris_data.keys())
In [5]:
print(iris_data['target_names'])
In [7]:
print(iris_data['feature_names'])
In [8]:
print(type(iris_data['data']))
In [11]:
print(iris_data['data'].shape)
In [13]:
print(iris_data['data'][:5])
In [14]:
print(iris_data['target'])
X
, labels denoted by lowercase y
f(x)=y
in mathemtatics; X
is input (matrix), y
is target array (vector)train_test_split()
functionX_train
contains 75% of dataset specified as train
set to build ML model X_test
contains 25% of dataset designated as test
set to evaluate model accuracyrandom_state
parameter sets fixed seed for same selection
In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
iris_data['data'], iris_data['target'], random_state=0)
In [22]:
print(X_train.shape)
print(y_train.shape)
In [23]:
print(X_test.shape)
print(y_test.shape)
NumPy array
into pandas DataFrame
iris_data.feature_names
scatter_matrix
from dataFrame, color by y_train
In [33]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import mglearn
from IPython.display import display
In [35]:
iris_df = pd.DataFrame(X_train, columns=iris_data.feature_names)
grr = pd.plotting.scatter_matrix(iris_df, c=y_train, figsize=(15,15), marker='o',
hist_kwds={'bins':20}, s=60, alpha=0.8, cmap=mglearn.cm3)
KNeighborClassifier
, is the number of neighborsk=1
In [36]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)
In [38]:
knn.fit(X_train, y_train)
Out[38]:
In [40]:
import numpy as np
X_new = np.array([[5, 2.9, 1, 0.2]])
print(X_new.shape)
In [42]:
prediction = knn.predict(X_new)
print("Prediction: {}".format(prediction))
print("PRedicted target name: {}".format(
iris_data['target_names'][prediction]))
In [44]:
y_pred = knn.predict(X_test)
print("Test set predictions:\n {}".format(y_pred))
In [45]:
print("Test set score: {:2f}".format(np.mean(y_pred == y_test)))
In [47]:
print("Test set score: {:2f}".format(knn.score(X_test, y_test)))
In [48]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
X_train, X_test, y_train, y_test = train_test_split(
iris_data['data'], iris_data['target'], random_state=0)
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)
print("Test set score: {:2f}".format(knn.score(X_test, y_test)))
In [ ]: