Importing Libraries

``````

In [1]:

import pandas as pd
import numpy as np
import seaborn as sns
import scipy.io
import scipy.misc
import matplotlib.pyplot as plt
from math import *

``````
``````

In [2]:

%matplotlib inline
sns.set_style(style="whitegrid")

``````

K-Means Clustering

Functions

``````

In [3]:

def findClosestCentroids(X, centroids):
c = np.ones((len(X), 1), dtype= np.uint8)
for i in range(len(X)):

# Computing distance between training example and centroids
dist = centroids - X[i]
dist = dist ** 2
dist = np.sum(dist, axis=1)

# Finding closest Centroid
argmin = np.argmin(dist)
c[i] = argmin

# Returning index of the closest centroids
return c

def computeCentroids(X, idx, K):

# Creating new centroids matrix
centroids = np.zeros((K, X.shape[1]))

for i in range(K):

# Finding trainig sets with same class
same_idx = (idx == i)

# Computing mean of the points
centroids[i] = np.mean(X[same_idx], axis=0)

# Returining new moved centroids
return centroids

def runKMeans(X, initial_centroids, max_iters, plot_progress = False):

#Initialize values
m, n = X.shape
K = len(initial_centroids)
centroids = initial_centroids
previous_centroids = centroids
idx = np.zeros((m,1), dtype= np.uint8)

# Run K-Means
for i in range(max_iters):

# Finding closest centroids
idx = findClosestCentroids(X, centroids).T[0]

if(plot_progress):
plotProgressKMeans(X, centroids, previous_centroids, idx, K, i)
previous_centroids = centroids

# Given the memberships, compute new centroids
centroids = computeCentroids(X, idx, K)

return ( centroids, idx )

def plotDataPoints(X, idx, K):
colors = np.zeros(( len(idx) , K))
colors[ idx == 0 ] = [ 1,0,0 ]
colors[ idx == 1 ] = [ 0,1,0 ]
colors[ idx == 2 ] = [ 0,0,1 ]
plt.scatter(X[:,0], X[:,1], s=60, c= colors)

def plotProgressKMeans(X, centroids, previous_centroids, idx, K, i):

# Scattering Data Points
plotDataPoints(X, idx, K)

# Plotting new Centroids as "X"
plt.plot(centroids[:,0], centroids[:,1], lw= 0 ,marker='X', c=[0,0,0], ms= 10)

# Plotting Progress of Algorithm
for i in range(K):

# Draw line from point1 to point2
drawLine(centroids[i, :], previous_centroids[i, :])

def drawLine(p1, p2):
plt.plot([p1[0], p2[0]], [p1[1], p2[1]], color=[0,0,0], lw=3)

def kMeansInitCentroids(X, K):
randIdx = np.random.permutation(K)
return X[randIdx[0:K]]

``````

``````

In [4]:

X = mat["X"]

``````

Test "findClosestCentroids" , "computeMean" Functions

``````

In [5]:

#Select an initial set of centroids
initial_centroids = np.array([[3, 3],
[6, 2],
[8, 5]])
K = 3

``````
``````

In [6]:

#Find the closest centroids for the examples using the initial_centroids
idx = findClosestCentroids(X,initial_centroids).T[0]
idx[0:3] + 1

``````
``````

Out[6]:

array([1, 3, 2], dtype=uint8)

``````
``````

In [7]:

# Centroids computed after initial finding of closest centroids
computeCentroids(X, idx, K)

``````
``````

Out[7]:

array([[ 2.42830111,  3.15792418],
[ 5.81350331,  2.63365645],
[ 7.11938687,  3.6166844 ]])

``````

Test K-Means Algorithm

``````

In [8]:

#Settings for running K-Means
K = 3
max_iter = 10

``````
``````

In [9]:

initial_centroids[0,:]

``````
``````

Out[9]:

array([3, 3])

``````
``````

In [10]:

# Running K-Means function with inital_centroids
plt.figure(figsize=(12,8))
runKMeans(X,initial_centroids,max_iters=10,plot_progress=True)

``````
``````

Out[10]:

(array([[ 1.95399466,  5.02557006],
[ 3.04367119,  1.01541041],
[ 6.03366736,  3.00052511]]),
array([0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
0], dtype=uint8))

``````

K-Means on Pixels

``````

In [11]:

# Convert Real image to 3d RGB matrix

# Reshaping Matrix to 2D Matrix
A_2d = A.reshape( ( A.shape[0] * A.shape[1], A.shape[2] ) )

``````
``````

In [12]:

# Computing memory usage of the matrix
A.nbytes * 8

``````
``````

Out[12]:

393216

``````

Running K-Means Algorithm

``````

In [13]:

# Divide by 255 so that all values are in the range 0 - 1
A = A / 255
A_2d = A_2d / 255

``````
``````

In [14]:

# Initial Values of K and max_iters
K = 16;
max_iters = 100;

``````
``````

In [15]:

A_2d

``````
``````

Out[15]:

array([[ 0.85882353,  0.70588235,  0.40392157],
[ 0.90196078,  0.7254902 ,  0.45490196],
[ 0.88627451,  0.72941176,  0.43137255],
...,
[ 0.25490196,  0.16862745,  0.15294118],
[ 0.22745098,  0.14509804,  0.14901961],
[ 0.20392157,  0.15294118,  0.13333333]])

``````
``````

In [16]:

# initialize the centroids
init_centroids = kMeansInitCentroids(A_2d, K)

``````
``````

In [17]:

A_2d.shape[1]

``````
``````

Out[17]:

3

``````
``````

In [18]:

# Running Algorithm
centroids, idx = runKMeans(A_2d, init_centroids ,max_iters)

``````
``````

In [19]:

print("New 16 Different Colors\n")
print(centroids)
print("\nIndices of each pixel\n")
print(idx)

``````
``````

New 16 Different Colors

[[ 0.54211338  0.44590449  0.39008539]
[ 0.61869607  0.44630056  0.18952853]
[ 0.94895101  0.81622935  0.5598811 ]
[ 0.64630156  0.57808429  0.53562768]
[ 0.89481123  0.6866671   0.3912539 ]
[ 0.40120942  0.3115505   0.24065909]
[ 0.97746058  0.89937912  0.71237278]
[ 0.96926174  0.95837401  0.85642977]
[ 0.90325651  0.54677216  0.21400069]
[ 0.20623404  0.19908224  0.19344558]
[ 0.73847194  0.58412733  0.32276739]
[ 0.81296464  0.70332831  0.48893581]
[ 0.09219838  0.09946448  0.08943195]
[ 0.34266777  0.3890417   0.51066556]
[ 0.52687934  0.65651239  0.82875407]
[ 0.81217121  0.73752391  0.73502511]]

Indices of each pixel

[4 4 4 ..., 9 9 9]

``````
``````

In [21]:

# Recover image with new colors
X_recovered = np.array([centroids[i] for i in idx])
X_recovered = X_recovered.reshape(A.shape)

``````
``````

In [22]:

fig = plt.figure()

ax1.imshow(A)
ax1.set_title("Before Using K-Means Algorithm")

ax2.imshow(X_recovered)
ax2.set_title("After Using K-Means Algorithm")

``````
``````

Out[22]:

<matplotlib.text.Text at 0x45e24bb1d0>

``````

Principal Component Analysis

Functions

``````

In [23]:

def featureNormalize(X):
X_norm  = X - np.mean(X,axis=0)
X_norm = X_norm / X.std(axis=0)

return (X_norm, np.mean(X,axis=0), X.std(axis=0))

def pca(X):
m, n = X.shape
sigma = ( 1 / m ) * np.dot(X.T,X)

return np.linalg.svd(sigma)

def projectData(X, U, K):
return np.dot(X, U[:,0:K])

def recoverData(Z, U, K):
return np.dot(Z, U[:,0:K].T)

``````

``````

In [24]:

X = mat['X']
m,n = X.shape

``````

Plot the Data

``````

In [25]:

plt.figure(figsize=(12,8))
plt.plot(X[:,0],X[:,1],data=X, lw=0, marker="o",ms = 10)

``````
``````

Out[25]:

[<matplotlib.lines.Line2D at 0x45e283e940>]

``````

Use PCA Algorithm

``````

In [26]:

# We should Feature normalize before using PCA Algorithm
X_norm, mu, sigma = featureNormalize(X)

# Finding eigenvectors using PCA algorithm
U, S, V = pca(X_norm)

``````
``````

In [27]:

pca(X_norm)

``````
``````

Out[27]:

(array([[-0.70710678, -0.70710678],
[-0.70710678,  0.70710678]]),
array([ 1.73553038,  0.26446962]),
array([[-0.70710678, -0.70710678],
[-0.70710678,  0.70710678]]))

``````
``````

In [30]:

S

``````
``````

Out[30]:

array([ 1.73553038,  0.26446962])

``````
``````

In [31]:

U

``````
``````

Out[31]:

array([[-0.70710678, -0.70710678],
[-0.70710678,  0.70710678]])

``````
``````

In [32]:

mu

``````
``````

Out[32]:

array([ 3.98926528,  5.00280585])

``````
``````

In [29]:

# Draw the eigenvectors centered at mean of data.
plt.figure(figsize=(12,8))
drawLine(mu, mu + 1.5 * S[0] * U[:,0])
drawLine(mu, mu + 1.5 * S[1] * U[:,1])
plt.plot(X[:,0],X[:,1], lw=0, marker="o",ms = 10)

``````
``````

Out[29]:

[<matplotlib.lines.Line2D at 0x45ee2484e0>]

``````
``````

In [56]:

# Plot the normalized dataset (returned from pca)
plt.figure(figsize=(12,8))
plt.plot(X_norm[:,0],X_norm[:,1], lw=0, marker="o",ms = 10)

``````
``````

Out[56]:

[<matplotlib.lines.Line2D at 0x77768d0278>]

``````
``````

In [76]:

# Project data into K dimension
Z = projectData(X_norm, U, 1)

# Printing first 5 projected value
print(Z[0:5].T[0])

``````
``````

[ 1.49631261 -0.92218067  1.22439232  1.64386173  1.2732206 ]

``````
``````

In [89]:

# Recovering Data
X_recovered = recoverData(Z, U, 1)

# Printing first 5 recoverd values
print(X_recovered[0:5,:])

``````
``````

[[-1.05805279 -1.05805279]
[ 0.65208021  0.65208021]
[-0.86577611 -0.86577611]
[-1.16238578 -1.16238578]
[-0.90030292 -0.90030292]]

``````
``````

In [115]:

# Draw lines connecting the projected points to the original points
plt.figure(figsize=(12,8))

plt.plot(X_recovered[:,0], X_recovered[:,1],
lw=0, markerfacecolor= "white", markeredgewidth=2 ,markeredgecolor="red" ,marker="o",ms = 10)
plt.plot(X_norm[:,0], X_norm[:,1],
lw=0,markerfacecolor= "white", markeredgewidth=2 , markeredgecolor="blue", marker="o",ms = 10)

for i in range(m):
drawLine(X_norm[i,:], X_recovered[i,:])

``````
``````

``````

Face Dataset

Functions

``````

In [643]:

def magic_display(matrix = None, cmap= 'gray'):
if matrix is None:
# selecting 100 random rows of the X
rand_indces = np.random.permutation(m)[0:100]
X_dis = X[rand_indces]
else:
X_dis = matrix

if( len(X_dis.shape) > 1 ):
m_test,n_test = X_dis.shape
axis_bound = 1
else:
m_test = 1
n_test = X_dis.shape[0]
axis_bound = 0

# each number width , height in plot
example_width = int(round(sqrt(n_test)))
example_height = int(round( n_test / example_width ))

# number of numbers to show in plot
display_rows = floor(sqrt(m_test))
display_cols = ceil(m_test / display_rows )

# intilazation array for holding previos 100 random numbers
display_array = np.ones((
))
count = 0;

for i in range(display_cols):
for j in range(display_rows):
if( count >= m_test ):
break

# max_val of each row in X_dis
max_val = np.max( X_dis[count : count+1], axis= axis_bound)

# Starting x,y point of numbers shape in array
ex_x_range = pad + ( j ) * ( example_height + pad )
ex_y_range = pad + ( i ) * ( example_width + pad )

if(m_test > 1):
ex_arr = X_dis[ count : count + 1 , 0:].reshape(example_height , example_width)
else:
ex_arr = X_dis[1:].reshape(example_height , example_width)

# Setting values
display_array[ ex_y_range : ex_y_range + example_width,
ex_x_range : ex_x_range + example_height] = np.divide(ex_arr , max_val).T
count += 1

# Get rod of grid
plt.grid(False)
plt.imshow(display_array, cmap= cmap)

``````

``````

In [644]:

X = mat['X']

``````
``````

In [645]:

# One example of X
print(X[0])
print("\n")
print(X.shape)

``````
``````

[ -37.86631387  -45.86631387  -53.86631387 ..., -110.86631387 -111.86631387
-99.86631387]

(5000, 1024)

``````

Display Face

``````

In [646]:

plt.figure(figsize=(12,8))
magic_display(X[0:100,:])

``````
``````

``````

PCA Algorithm

``````

In [647]:

X_norm, mu, sigma = featureNormalize(X)

``````
``````

In [648]:

# Finding eigenvectors using PCA algorithm
U, S, V = pca(X_norm)

``````
``````

In [649]:

# Visualize the top 36 eigenvectors found
plt.figure(figsize=(12,8))
magic_display(U[:,0:36].T, cmap= 'gist_ncar')

``````
``````

``````
``````

In [650]:

# Dimension Reduction for Faces
K = 150
Z = projectData(X_norm, U, K)

``````
``````

In [651]:

Z.shape

``````
``````

Out[651]:

(5000, 150)

``````
``````

In [652]:

# Visualization of Faces after PCA Dimension Reduction
K = 150
X_rec = recoverData(Z, U, K)

``````
``````

In [653]:

X_rec.shape

``````
``````

Out[653]:

(5000, 1024)

``````
``````

In [654]:

plt.figure(figsize=(12,8))

plt.subplot(1,2,1)
plt.subplot(1,2,1).set_title("Original faces")
magic_display(X[0:100,:])

plt.subplot(1,2,2)
plt.subplot(1,2,2).set_title("Recovered faces")
magic_display(X_rec[0:100,:])

``````
``````

``````
``````

In [ ]:

``````