In [2]:
#import
from sklearn import datasets
In [3]:
iris = datasets.load_iris()
digits = datasets.load_digits()
In [4]:
print(digits)
In [4]:
print(type(iris));
print(type(digits))
In [5]:
print(iris.data[:6]) # training data is stored in data member of iris dataset
In [6]:
print(iris.target)
In [7]:
iris.data.shape
Out[7]:
In [8]:
iris.target.shape
Out[8]:
In [9]:
# print feature
print(iris.feature_names)
In [10]:
print(iris.target_names)
In [11]:
print(type(iris.data), type(iris.target))
In [12]:
# loading featured and responsed into X and y
X = iris.data
y = iris.target
print(X.shape);
print(y.shape)
In [13]:
from sklearn.neighbors import KNeighborsClassifier
In [14]:
# instantiate the KN
knn = KNeighborsClassifier(n_neighbors=2)
# training the model
knn.fit(X, y)
Out[14]:
In [15]:
#predict the value [5,4,3,2]
knn.predict([5,4,3,2])
Out[15]:
In [16]:
knn.predict([[5,4,3,2], [1,2,3,5]])
Out[16]:
In [17]:
from sklearn.linear_model import LogisticRegression
In [18]:
lrm = LogisticRegression()
In [19]:
lrm.fit(X, y)
Out[19]:
In [20]:
lrm.predict([[5,4,3,2], [1,2,3,5]])
Out[20]:
In [21]:
from sklearn import metrics
In [22]:
# test LogisticRegression
# training my model
lrm = LogisticRegression()
lrm.fit(X, y)
y_pred = lrm.predict(X)
# testing accuracy
metrics.accuracy_score(y, y_pred)
Out[22]:
In [23]:
# test KNN when K=1
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X, y)
y_pred = knn.predict(X)
# testing accracy
metrics.accuracy_score(y, y_pred)
Out[23]:
In [24]:
# test KNN when K = 5
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)
y_pred = knn.predict(X)
# testing accracy
metrics.accuracy_score(y, y_pred)
Out[24]:
In [25]:
# splitting the data
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=4)
In [26]:
# test LogisticRegression
# training my model
lrm = LogisticRegression()
lrm.fit(X_train, y_train)
y_pred = lrm.predict(X_test)
# testing accuracy
metrics.accuracy_score(y_test, y_pred)
Out[26]:
In [27]:
# test KNN when K=1
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
# testing accracy
metrics.accuracy_score(y_test, y_pred)
Out[27]:
In [28]:
# test KNN when K=5
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
# testing accracy
metrics.accuracy_score(y_test, y_pred)
Out[28]:
By training and testing our data, we can say it is better in KNN when K = 5
In [29]:
# Let's have a loop which will check for all possible value of K
accuracy = []
K = range(1,26)
for k in K:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
# testing accracy
ac = metrics.accuracy_score(y_test, y_pred)
accuracy.append(ac)
In [30]:
# now plotting it
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(K, accuracy)
Out[30]:
In [31]:
# we can see, model is performing better when K between 6 and 16
# let's train our model on KNN when K = 6
# test KNN when K=6
knn = KNeighborsClassifier(n_neighbors=6)
knn.fit(X, y)
y_pred = knn.predict(X)
# testing accracy
metrics.accuracy_score(y, y_pred)
Out[31]:
In [1]:
print(digits)
In [ ]: