# Importing Libraries

``````

In [332]:

import pandas as pd
import numpy as np
import seaborn as sns
import scipy.io
import scipy.misc
import matplotlib.pyplot as plt
from math import *

``````
``````

In [4]:

%matplotlib inline
sns.set_style(style="whitegrid")

``````

# K-Means Clustering

## Functions

``````

In [5]:

def findClosestCentroids(X, centroids):
c = np.ones((len(X), 1), dtype= np.uint8)
for i in range(len(X)):

# Computing distance between training example and centroids
dist = centroids - X[i]
dist = dist ** 2
dist = np.sum(dist, axis=1)

# Finding closest Centroid
argmin = np.argmin(dist)
c[i] = argmin

# Returning index of the closest centroids
return c

def computeCentroids(X, idx, K):

# Creating new centroids matrix
centroids = np.zeros((K, X.shape[1]))

for i in range(K):

# Finding trainig sets with same class
same_idx = (idx == i)

# Computing mean of the points
centroids[i] = np.mean(X[same_idx], axis=0)

# Returining new moved centroids
return centroids

def runKMeans(X, initial_centroids, max_iters, plot_progress = False):

#Initialize values
m, n = X.shape
K = len(initial_centroids)
centroids = initial_centroids
previous_centroids = centroids
idx = np.zeros((m,1), dtype= np.uint8)

# Run K-Means
for i in range(max_iters):

# Finding closest centroids
idx = findClosestCentroids(X, centroids).T[0]

if(plot_progress):
plotProgressKMeans(X, centroids, previous_centroids, idx, K, i)
previous_centroids = centroids

# Given the memberships, compute new centroids
centroids = computeCentroids(X, idx, K)

return ( centroids, idx )

def plotDataPoints(X, idx, K):
colors = np.zeros(( len(idx) , K))
colors[ idx == 0 ] = [ 1,0,0 ]
colors[ idx == 1 ] = [ 0,1,0 ]
colors[ idx == 2 ] = [ 0,0,1 ]
plt.scatter(X[:,0], X[:,1], s=60, c= colors)

def plotProgressKMeans(X, centroids, previous_centroids, idx, K, i):

# Scattering Data Points
plotDataPoints(X, idx, K)

# Plotting new Centroids as "X"
plt.plot(centroids[:,0], centroids[:,1], lw= 0 ,marker='X', c=[0,0,0], ms= 10)

# Plotting Progress of Algorithm
for i in range(K):

# Draw line from point1 to point2
drawLine(centroids[i, :], previous_centroids[i, :])

def drawLine(p1, p2):
plt.plot([p1[0], p2[0]], [p1[1], p2[1]], color=[0,0,0], lw=3)

def kMeansInitCentroids(X, K):
randIdx = np.random.permutation(K)
return X[randIdx[0:K]]

``````

## Load The Data

``````

In [6]:

# Load the Data
X = mat["X"]

``````

### Test "findClosestCentroids" , "computeMean" Functions

``````

In [7]:

#Select an initial set of centroids
initial_centroids = np.array([[3, 3],
[6, 2],
[8, 5]])
K = 3

``````
``````

In [8]:

#Find the closest centroids for the examples using the initial_centroids
idx = findClosestCentroids(X,initial_centroids).T[0]
idx[0:3] + 1

``````
``````

Out[8]:

array([1, 3, 2], dtype=uint8)

``````
``````

In [9]:

# Centroids computed after initial finding of closest centroids
computeCentroids(X, idx, K)

``````
``````

Out[9]:

array([[ 2.42830111,  3.15792418],
[ 5.81350331,  2.63365645],
[ 7.11938687,  3.6166844 ]])

``````

### Test K-Means Algorithm

``````

In [10]:

#Settings for running K-Means
K = 3
max_iter = 10

``````
``````

In [11]:

initial_centroids[0,:]

``````
``````

Out[11]:

array([3, 3])

``````
``````

In [12]:

# Running K-Means function with inital_centroids
plt.figure(figsize=(12,8))
runKMeans(X,initial_centroids,max_iters=10,plot_progress=True)

``````
``````

Out[12]:

(array([[ 1.95399466,  5.02557006],
[ 3.04367119,  1.01541041],
[ 6.03366736,  3.00052511]]),
array([0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
0], dtype=uint8))

``````

## K-Means on Pixels

### Load the Image

``````

In [13]:

# Convert Real image to 3d RGB matrix

# Reshaping Matrix to 2D Matrix
A_2d = A.reshape( ( A.shape[0] * A.shape[1], A.shape[2] ) )

``````
``````

In [14]:

# Computing memory usage of the matrix
A.nbytes * 8

``````
``````

Out[14]:

393216

``````

### Running K-Means Algorithm

``````

In [15]:

# Divide by 255 so that all values are in the range 0 - 1
A = A / 255
A_2d = A_2d / 255

``````
``````

In [16]:

# Initial Values of K and max_iters
K = 16;
max_iters = 100;

``````
``````

In [17]:

A_2d

``````
``````

Out[17]:

array([[ 0.85882353,  0.70588235,  0.40392157],
[ 0.90196078,  0.7254902 ,  0.45490196],
[ 0.88627451,  0.72941176,  0.43137255],
...,
[ 0.25490196,  0.16862745,  0.15294118],
[ 0.22745098,  0.14509804,  0.14901961],
[ 0.20392157,  0.15294118,  0.13333333]])

``````
``````

In [18]:

# initialize the centroids
init_centroids = kMeansInitCentroids(A_2d, K)

``````
``````

In [19]:

A_2d.shape[1]

``````
``````

Out[19]:

3

``````
``````

In [20]:

# Running Algorithm
centroids, idx = runKMeans(A_2d, init_centroids ,max_iters)

``````
``````

In [21]:

print("New 16 Different Colors\n")
print(centroids)
print("\nIndices of each pixel\n")
print(idx)

``````
``````

New 16 Different Colors

[[ 0.73184329  0.5776567   0.31854035]
[ 0.53817969  0.44357819  0.38935128]
[ 0.81442319  0.73466258  0.72813665]
[ 0.35137993  0.4044133   0.54620444]
[ 0.0911979   0.09848923  0.08841637]
[ 0.2001637   0.19427292  0.18855306]
[ 0.38681812  0.30688788  0.24777841]
[ 0.60893969  0.43881145  0.18726419]
[ 0.97682748  0.89736743  0.71040853]
[ 0.90000596  0.54178437  0.20853448]
[ 0.94918348  0.81282138  0.55267559]
[ 0.81762228  0.7049097   0.481371  ]
[ 0.96970439  0.95789617  0.85502162]
[ 0.64636497  0.579549    0.53681137]
[ 0.54072645  0.67696561  0.84444873]
[ 0.89266188  0.67452403  0.37811129]]

Indices of each pixel

[15 11 15 ...,  5  5  5]

``````
``````

In [22]:

# Recover image with new colors
X_recovered = np.array([centroids[i] for i in idx])
X_recovered = X_recoverd.reshape(A.shape)

``````
``````

In [23]:

fig = plt.figure()

ax1.imshow(A)
ax1.set_title("Before Using K-Means Algorithm")

ax2.imshow(X_recovered)
ax2.set_title("After Using K-Means Algorithm")

``````
``````

Out[23]:

<matplotlib.text.Text at 0x7773fa6668>

``````

# Principal Component Analysis

## Functions

``````

In [88]:

def featureNormalize(X):
X_norm  = X - np.mean(X,axis=0)
X_norm = X_norm / X.std(axis=0)

return (X_norm, np.mean(X,axis=0), X.std(axis=0))

def pca(X):
m, n = X.shape
sigma = ( 1 / m ) * np.dot(X.T,X)

return np.linalg.svd(sigma)

def projectData(X, U, K):
return np.dot(X, U[:,0:K])

def recoverData(Z, U, K):
return np.dot(Z, U[:,0:K].T)

``````

### Load the Data

``````

In [94]:

X = mat['X']
m,n = X.shape

``````

### Plot the Data

``````

In [52]:

plt.figure(figsize=(12,8))
plt.plot(X[:,0],X[:,1],data=X, lw=0, marker="o",ms = 10)

``````
``````

Out[52]:

[<matplotlib.lines.Line2D at 0x7773d73f60>]

``````

### Use PCA Algorithm

``````

In [64]:

# We should Feature normalize before using PCA Algorithm
X_norm, mu, sigma = featureNormalize(X)

# Finding eigenvectors using PCA algorithm
U, S, V = pca(X_norm)

``````
``````

In [54]:

pca(X_norm)

``````
``````

Out[54]:

(array([[-0.70710678, -0.70710678],
[-0.70710678,  0.70710678]]),
array([ 1.73553038,  0.26446962]),
array([[-0.70710678, -0.70710678],
[-0.70710678,  0.70710678]]))

``````
``````

In [55]:

# Draw the eigenvectors centered at mean of data.
plt.figure(figsize=(12,8))
drawLine(mu, mu + 1.5 * S[0] * U[:,0])
drawLine(mu, mu + 1.5 * S[1] * U[:,1])
plt.plot(X[:,0],X[:,1], lw=0, marker="o",ms = 10)

``````
``````

Out[55]:

[<matplotlib.lines.Line2D at 0x7773ff65f8>]

``````
``````

In [56]:

# Plot the normalized dataset (returned from pca)
plt.figure(figsize=(12,8))
plt.plot(X_norm[:,0],X_norm[:,1], lw=0, marker="o",ms = 10)

``````
``````

Out[56]:

[<matplotlib.lines.Line2D at 0x77768d0278>]

``````
``````

In [76]:

# Project data into K dimension
Z = projectData(X_norm, U, 1)

# Printing first 5 projected value
print(Z[0:5].T[0])

``````
``````

[ 1.49631261 -0.92218067  1.22439232  1.64386173  1.2732206 ]

``````
``````

In [89]:

# Recovering Data
X_recovered = recoverData(Z, U, 1)

# Printing first 5 recoverd values
print(X_recovered[0:5,:])

``````
``````

[[-1.05805279 -1.05805279]
[ 0.65208021  0.65208021]
[-0.86577611 -0.86577611]
[-1.16238578 -1.16238578]
[-0.90030292 -0.90030292]]

``````
``````

In [115]:

# Draw lines connecting the projected points to the original points
plt.figure(figsize=(12,8))

plt.plot(X_recovered[:,0], X_recovered[:,1],
lw=0, markerfacecolor= "white", markeredgewidth=2 ,markeredgecolor="red" ,marker="o",ms = 10)
plt.plot(X_norm[:,0], X_norm[:,1],
lw=0,markerfacecolor= "white", markeredgewidth=2 , markeredgecolor="blue", marker="o",ms = 10)

for i in range(m):
drawLine(X_norm[i,:], X_recovered[i,:])

``````
``````

``````

## Face Dataset

### Functions

``````

In [639]:

def magic_display(matrix = None, cmap= 'gray'):
if matrix is None:
# selecting 100 random rows of the X
rand_indces = np.random.permutation(m)[0:100]
X_dis = X[rand_indces]
else:
X_dis = matrix

if( len(X_dis.shape) > 1 ):
m_test,n_test = X_dis.shape
axis_bound = 1
else:
m_test = 1
n_test = X_dis.shape[0]
axis_bound = 0

# each number width , height in plot
example_width = int(round(sqrt(n_test)))
example_height = int(round( n_test / example_width ))

# number of numbers to show in plot
display_rows = floor(sqrt(m_test))
display_cols = ceil(m_test / display_rows )

# padding between numbers

# intilazation array for holding previos 100 random numbers
display_array = np.ones((
pad + display_rows * ( example_height + pad ),
pad + display_cols * ( example_width + pad )
))
count = 0;

for i in range(display_cols):
for j in range(display_rows):
if( count >= m_test ):
break

# max_val of each row in X_dis
max_val = np.max( X_dis[count : count+1], axis= axis_bound)

# Starting x,y point of numbers shape in array
ex_x_range = pad + ( j ) * ( example_height + pad )
ex_y_range = pad + ( i ) * ( example_width + pad )

if(m_test > 1):
ex_arr = X_dis[ count : count + 1 , 0:].reshape(example_height , example_width)
else:
ex_arr = X_dis[1:].reshape(example_height , example_width)

# Setting values
display_array[ ex_y_range : ex_y_range + example_width,
ex_x_range : ex_x_range + example_height] = np.divide(ex_arr , max_val).T
count += 1

# Get rod of grid
plt.grid(False)
plt.imshow(display_array, cmap= cmap)

``````

### Load the Data

``````

In [640]:

X = mat['X']

``````
``````

In [641]:

# One example of X
print(X[0])
print("\n")
print(X.shape)

``````
``````

[ -37.86631387  -45.86631387  -53.86631387 ..., -110.86631387 -111.86631387
-99.86631387]

(5000, 1024)

``````

### Display Face

``````

In [642]:

plt.figure(figsize=(12,8))
magic_display(X[0:100,:])

``````
``````

``````

### PCA Algorithm

``````

In [626]:

X_norm, mu, sigma = featureNormalize(X)

``````
``````

In [627]:

# Finding eigenvectors using PCA algorithm
U, S, V = pca(X_norm)

``````
``````

In [628]:

# Visualize the top 36 eigenvectors found
plt.figure(figsize=(12,8))
magic_display(U[:,0:36].T, cmap= 'gist_ncar')

``````
``````

``````
``````

In [629]:

# Dimension Reduction for Faces
K = 150
Z = projectData(X_norm, U, K)

``````
``````

In [630]:

Z.shape

``````
``````

Out[630]:

(5000, 150)

``````
``````

In [631]:

# Visualization of Faces after PCA Dimension Reduction
K = 150
X_rec = recoverData(Z, U, K)

``````
``````

In [632]:

X_rec.shape

``````
``````

Out[632]:

(5000, 1024)

``````
``````

In [634]:

plt.figure(figsize=(12,8))

plt.subplot(1,2,1)
plt.subplot(1,2,1).set_title("Original faces")
magic_display(X[0:100,:])

plt.subplot(1,2,2)
plt.subplot(1,2,2).set_title("Recovered faces")
magic_display(X_rec[0:100,:])

``````
``````

``````
``````

In [ ]:

``````