notebook.community

Edit and run



In [1]:

    
# Chapter 3
# Classifying images
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split



In [2]:

    
# Load the dataset
digits = load_digits()



In [3]:

    
# Showing shapes
print("Image Data Shape {}".format(digits.data.shape))









    



Image Data Shape (1797, 64)



In [4]:

    
print("Label Data Shape {}".format(digits.target.shape))









    



Label Data Shape (1797,)



In [5]:

    
digits.data[:3]









    Out[5]:





array([[ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.,  0.,  0., 13., 15., 10.,
        15.,  5.,  0.,  0.,  3., 15.,  2.,  0., 11.,  8.,  0.,  0.,  4.,
        12.,  0.,  0.,  8.,  8.,  0.,  0.,  5.,  8.,  0.,  0.,  9.,  8.,
         0.,  0.,  4., 11.,  0.,  1., 12.,  7.,  0.,  0.,  2., 14.,  5.,
        10., 12.,  0.,  0.,  0.,  0.,  6., 13., 10.,  0.,  0.,  0.],
       [ 0.,  0.,  0., 12., 13.,  5.,  0.,  0.,  0.,  0.,  0., 11., 16.,
         9.,  0.,  0.,  0.,  0.,  3., 15., 16.,  6.,  0.,  0.,  0.,  7.,
        15., 16., 16.,  2.,  0.,  0.,  0.,  0.,  1., 16., 16.,  3.,  0.,
         0.,  0.,  0.,  1., 16., 16.,  6.,  0.,  0.,  0.,  0.,  1., 16.,
        16.,  6.,  0.,  0.,  0.,  0.,  0., 11., 16., 10.,  0.,  0.],
       [ 0.,  0.,  0.,  4., 15., 12.,  0.,  0.,  0.,  0.,  3., 16., 15.,
        14.,  0.,  0.,  0.,  0.,  8., 13.,  8., 16.,  0.,  0.,  0.,  0.,
         1.,  6., 15., 11.,  0.,  0.,  0.,  1.,  8., 13., 15.,  1.,  0.,
         0.,  0.,  9., 16., 16.,  5.,  0.,  0.,  0.,  0.,  3., 13., 16.,
        16., 11.,  5.,  0.,  0.,  0.,  0.,  3., 11., 16.,  9.,  0.]])



In [6]:

    
# Let's represent the images
plt.figure(figsize=(20,4))
for index, (image, label) in enumerate(zip(digits.data[0:5], digits.target[0:5])):
    plt.subplot(1, 5, index + 1)
    plt.imshow(np.reshape(image, (8,8)), cmap=plt.cm.gray)
    plt.title("Training: {}\n".format(label), fontsize = 20)



In [7]:

    
# Split training / testing datasets
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.25, random_state=0)



In [8]:

    
# Logistic regression
# Training
lreg_model = LogisticRegression()
lreg_model.fit(X_train, y_train)









    Out[8]:





LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)



In [9]:

    
# Calculate predictions
y_pred = lreg_model.predict(X_test)



In [10]:

    
# Show model accuracy
score = lreg_model.score(X_test, y_test)
print("Logistic regression model score: {:.2f}".format(score))









    



Logistic regression model score: 0.95



In [11]:

    
# Now we'll do it using SVM
from sklearn.svm import SVC



In [13]:

    
# Using a linear kernel
svc_linear_kernel = SVC(kernel='linear')
svc_linear_kernel.fit(X_train, y_train)
print("SVC with linear kernel score ---> {:.2f}".format(svc_linear_kernel.score(X_test, y_test)))









    



SVC with linear kernel score ---> 0.97



In [14]:

    
# Using a polynomial kernel
svc_poly_kernel = SVC(kernel='poly')
svc_poly_kernel.fit(X_train, y_train)
print("SVC with polynomial kernel score ---> {:.2f}".format(svc_poly_kernel.score(X_test, y_test)))









    



SVC with polynomial kernel score ---> 0.98



In [ ]: