CSAL4243: Introduction to Machine Learning

Muhammad Mudassir Khan (mudasssir.khan@ucp.edu.pk)

Lecture 9: Logistic Regression and kNN Examples

Overview

University Admission Dataset
- K - Nearest Neighbor (kNN) Classifier
- Logistic-Regression
Iris Flower Dataset
- K - Nearest Neighbor (kNN) Classifier
- Logistic-Regression
Resources
Credits

University Admission Dataset

Find whether a student get admitted into a university based on his score in two exams taken by the university. You have historical data of previous applicants who got admitted and rejected based on their score on these two exams.

K - Nearest Neighbor (kNN) Classifier



In [ ]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors

df = pd.read_csv('datasets/exam_dataset1.csv', encoding='utf-8')
n_neighbors = 5

X = np.array(df[['exam1','exam2']])
y = np.array(df[['admission']]).ravel()

h = .02  # step size in the mesh

# # Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

for weights in ['uniform', 'distance']:
    # we create an instance of Neighbours Classifier and fit the data.
    clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
    clf.fit(X, y)
    print(clf.score(X,y))
    
    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

    # Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.title("2-Class classification (k = %i, weights = '%s')"
              % (n_neighbors, weights))

plt.show()

Logistic Regression



In [ ]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import linear_model

df = pd.read_csv('datasets/exam_dataset1.csv', encoding='utf-8')

X = np.array(df[['exam1','exam2']])
y = np.array(df[['admission']]).ravel()

h = .02  # step size in the mesh

logreg = linear_model.LogisticRegression(C=1e5)

# we create an instance of Neighbours Classifier and fit the data.
logreg.fit(X, y)
print(logreg.score(X,y))

# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(1, figsize=(4, 3))
plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)

# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', cmap=plt.cm.Paired)
plt.xlabel('Exam 1')
plt.ylabel('Exam 2')

plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())

plt.show()

Iris Flower Dataset

Using sepal length and width, predict the type of flower.

K - Nearest Neighbor (kNN) Classifier



In [ ]:

    
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets

n_neighbors = 1

# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features. We could
                      # avoid this ugly slicing by using a two-dim dataset
y = iris.target

h = .02  # step size in the mesh

# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

for weights in ['uniform', 'distance']:
    # we create an instance of Neighbours Classifier and fit the data.
    clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
    clf.fit(X, y)
    print(clf.score(X,y))
    
    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

    # Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.title("3-Class classification (k = %i, weights = '%s')"
              % (n_neighbors, weights))

plt.show()

Logistic Regression



In [ ]:

    
# Code source: Gaël Varoquaux
# Modified for documentation by Jaques Grobler
# License: BSD 3 clause

import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model, datasets

# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
Y = iris.target

h = .02  # step size in the mesh

logreg = linear_model.LogisticRegression(C=1e5)

# we create an instance of Neighbours Classifier and fit the data.
logreg.fit(X, Y)
print(logreg.score(X,y))
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(1, figsize=(4, 3))
plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)

# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')

plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())

plt.show()



In [ ]:

    
logreg.coef_



In [ ]:

    
logreg.intercept_

Regularization Example



In [230]:

    
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors
from matplotlib.colors import ListedColormap
from sklearn import linear_model



In [231]:

    
df_reg = pd.read_csv('datasets/example2.csv', encoding='utf-8')

X = np.array(df_reg[['x']])
y = np.array(df_reg[['y']]).ravel()

# X = np.array(df_reg[['x1','x2']])
# y = np.array(df_reg[['label']]).ravel()



In [232]:

    
plt.scatter(X,y)
plt.show()



In [233]:

    
X.shape









    Out[233]:





(5, 1)



In [234]:

    
df_reg["x_2"] = df_reg["x"]**2
df_reg["x_3"] = df_reg["x"]**3
df_reg["x_4"] = df_reg["x"]**4



In [235]:

    
X = np.array(df_reg[['x','x_2','x_3','x_4']])



In [236]:

    
reg = linear_model.Ridge()

# we create an instance of Neighbours Classifier and fit the data.
reg.fit(X, y)
print(reg.score(X,y))









    



0.703331855374



In [237]:

    
x_line = np.linspace(0,8,100)



In [238]:

    
x_line = np.array([x_line,x_line**2,x_line**3,x_line**4]).T



In [239]:

    
y_line = reg.predict(x_line)



In [240]:

    
reg.intercept_









    Out[240]:





2.5592944159434556



In [241]:

    
plt.scatter(X[:,0],y)
plt.plot(x_line[:,0],y_line)
plt.show()

Resources

Course website: https://w4zir.github.io/ml17s/

Course resources

Credits

Raschka, Sebastian. Python machine learning. Birmingham, UK: Packt Publishing, 2015. Print.

Andrew Ng, Machine Learning, Coursera

Scikit Learn

David Kaleko