In [332]:
import pandas as pd
import numpy as np
import seaborn as sns
import scipy.io
import scipy.misc
import matplotlib.pyplot as plt
from math import *
In [4]:
%matplotlib inline
sns.set_style(style="whitegrid")
In [5]:
def findClosestCentroids(X, centroids):
c = np.ones((len(X), 1), dtype= np.uint8)
for i in range(len(X)):
# Computing distance between training example and centroids
dist = centroids - X[i]
dist = dist ** 2
dist = np.sum(dist, axis=1)
# Finding closest Centroid
argmin = np.argmin(dist)
c[i] = argmin
# Returning index of the closest centroids
return c
def computeCentroids(X, idx, K):
# Creating new centroids matrix
centroids = np.zeros((K, X.shape[1]))
for i in range(K):
# Finding trainig sets with same class
same_idx = (idx == i)
# Computing mean of the points
centroids[i] = np.mean(X[same_idx], axis=0)
# Returining new moved centroids
return centroids
def runKMeans(X, initial_centroids, max_iters, plot_progress = False):
#Initialize values
m, n = X.shape
K = len(initial_centroids)
centroids = initial_centroids
previous_centroids = centroids
idx = np.zeros((m,1), dtype= np.uint8)
# Run K-Means
for i in range(max_iters):
# Finding closest centroids
idx = findClosestCentroids(X, centroids).T[0]
if(plot_progress):
plotProgressKMeans(X, centroids, previous_centroids, idx, K, i)
previous_centroids = centroids
# Given the memberships, compute new centroids
centroids = computeCentroids(X, idx, K)
return ( centroids, idx )
def plotDataPoints(X, idx, K):
colors = np.zeros(( len(idx) , K))
colors[ idx == 0 ] = [ 1,0,0 ]
colors[ idx == 1 ] = [ 0,1,0 ]
colors[ idx == 2 ] = [ 0,0,1 ]
plt.scatter(X[:,0], X[:,1], s=60, c= colors)
def plotProgressKMeans(X, centroids, previous_centroids, idx, K, i):
# Scattering Data Points
plotDataPoints(X, idx, K)
# Plotting new Centroids as "X"
plt.plot(centroids[:,0], centroids[:,1], lw= 0 ,marker='X', c=[0,0,0], ms= 10)
# Plotting Progress of Algorithm
for i in range(K):
# Draw line from point1 to point2
drawLine(centroids[i, :], previous_centroids[i, :])
def drawLine(p1, p2):
plt.plot([p1[0], p2[0]], [p1[1], p2[1]], color=[0,0,0], lw=3)
def kMeansInitCentroids(X, K):
randIdx = np.random.permutation(K)
return X[randIdx[0:K]]
In [6]:
# Load the Data
mat = scipy.io.loadmat("ex7data2.mat")
X = mat["X"]
In [7]:
#Select an initial set of centroids
initial_centroids = np.array([[3, 3],
[6, 2],
[8, 5]])
K = 3
In [8]:
#Find the closest centroids for the examples using the initial_centroids
idx = findClosestCentroids(X,initial_centroids).T[0]
idx[0:3] + 1
Out[8]:
In [9]:
# Centroids computed after initial finding of closest centroids
computeCentroids(X, idx, K)
Out[9]:
In [10]:
#Settings for running K-Means
K = 3
max_iter = 10
In [11]:
initial_centroids[0,:]
Out[11]:
In [12]:
# Running K-Means function with inital_centroids
plt.figure(figsize=(12,8))
runKMeans(X,initial_centroids,max_iters=10,plot_progress=True)
Out[12]:
In [13]:
# Convert Real image to 3d RGB matrix
A = scipy.misc.imread("bird_small.png")
# Reshaping Matrix to 2D Matrix
A_2d = A.reshape( ( A.shape[0] * A.shape[1], A.shape[2] ) )
In [14]:
# Computing memory usage of the matrix
A.nbytes * 8
Out[14]:
In [15]:
# Divide by 255 so that all values are in the range 0 - 1
A = A / 255
A_2d = A_2d / 255
In [16]:
# Initial Values of K and max_iters
K = 16;
max_iters = 100;
In [17]:
A_2d
Out[17]:
In [18]:
# initialize the centroids
init_centroids = kMeansInitCentroids(A_2d, K)
In [19]:
A_2d.shape[1]
Out[19]:
In [20]:
# Running Algorithm
centroids, idx = runKMeans(A_2d, init_centroids ,max_iters)
In [21]:
print("New 16 Different Colors\n")
print(centroids)
print("\nIndices of each pixel\n")
print(idx)
In [22]:
# Recover image with new colors
X_recovered = np.array([centroids[i] for i in idx])
X_recovered = X_recoverd.reshape(A.shape)
In [23]:
fig = plt.figure()
ax1 = fig.add_axes([0,0,1,1])
ax2 = fig.add_axes([1,0,1,1])
ax1.imshow(A)
ax1.set_title("Before Using K-Means Algorithm")
ax2.imshow(X_recovered)
ax2.set_title("After Using K-Means Algorithm")
Out[23]:
In [88]:
def featureNormalize(X):
X_norm = X - np.mean(X,axis=0)
X_norm = X_norm / X.std(axis=0)
return (X_norm, np.mean(X,axis=0), X.std(axis=0))
def pca(X):
m, n = X.shape
sigma = ( 1 / m ) * np.dot(X.T,X)
return np.linalg.svd(sigma)
def projectData(X, U, K):
return np.dot(X, U[:,0:K])
def recoverData(Z, U, K):
return np.dot(Z, U[:,0:K].T)
In [94]:
mat = scipy.io.loadmat('ex7data1.mat')
X = mat['X']
m,n = X.shape
In [52]:
plt.figure(figsize=(12,8))
plt.plot(X[:,0],X[:,1],data=X, lw=0, marker="o",ms = 10)
Out[52]:
In [64]:
# We should Feature normalize before using PCA Algorithm
X_norm, mu, sigma = featureNormalize(X)
# Finding eigenvectors using PCA algorithm
U, S, V = pca(X_norm)
In [54]:
pca(X_norm)
Out[54]:
In [55]:
# Draw the eigenvectors centered at mean of data.
plt.figure(figsize=(12,8))
drawLine(mu, mu + 1.5 * S[0] * U[:,0])
drawLine(mu, mu + 1.5 * S[1] * U[:,1])
plt.plot(X[:,0],X[:,1], lw=0, marker="o",ms = 10)
Out[55]:
In [56]:
# Plot the normalized dataset (returned from pca)
plt.figure(figsize=(12,8))
plt.plot(X_norm[:,0],X_norm[:,1], lw=0, marker="o",ms = 10)
Out[56]:
In [76]:
# Project data into K dimension
Z = projectData(X_norm, U, 1)
# Printing first 5 projected value
print(Z[0:5].T[0])
In [89]:
# Recovering Data
X_recovered = recoverData(Z, U, 1)
# Printing first 5 recoverd values
print(X_recovered[0:5,:])
In [115]:
# Draw lines connecting the projected points to the original points
plt.figure(figsize=(12,8))
plt.plot(X_recovered[:,0], X_recovered[:,1],
lw=0, markerfacecolor= "white", markeredgewidth=2 ,markeredgecolor="red" ,marker="o",ms = 10)
plt.plot(X_norm[:,0], X_norm[:,1],
lw=0,markerfacecolor= "white", markeredgewidth=2 , markeredgecolor="blue", marker="o",ms = 10)
for i in range(m):
drawLine(X_norm[i,:], X_recovered[i,:])
In [639]:
def magic_display(matrix = None, cmap= 'gray'):
if matrix is None:
# selecting 100 random rows of the X
rand_indces = np.random.permutation(m)[0:100]
X_dis = X[rand_indces]
else:
X_dis = matrix
if( len(X_dis.shape) > 1 ):
m_test,n_test = X_dis.shape
axis_bound = 1
else:
m_test = 1
n_test = X_dis.shape[0]
axis_bound = 0
# each number width , height in plot
example_width = int(round(sqrt(n_test)))
example_height = int(round( n_test / example_width ))
# number of numbers to show in plot
display_rows = floor(sqrt(m_test))
display_cols = ceil(m_test / display_rows )
# padding between numbers
pad = 2
# intilazation array for holding previos 100 random numbers
display_array = np.ones((
pad + display_rows * ( example_height + pad ),
pad + display_cols * ( example_width + pad )
))
count = 0;
for i in range(display_cols):
for j in range(display_rows):
if( count >= m_test ):
break
# max_val of each row in X_dis
max_val = np.max( X_dis[count : count+1], axis= axis_bound)
# Starting x,y point of numbers shape in array
ex_x_range = pad + ( j ) * ( example_height + pad )
ex_y_range = pad + ( i ) * ( example_width + pad )
if(m_test > 1):
ex_arr = X_dis[ count : count + 1 , 0:].reshape(example_height , example_width)
else:
ex_arr = X_dis[1:].reshape(example_height , example_width)
# Setting values
display_array[ ex_y_range : ex_y_range + example_width,
ex_x_range : ex_x_range + example_height] = np.divide(ex_arr , max_val).T
count += 1
# Get rod of grid
plt.grid(False)
plt.imshow(display_array, cmap= cmap)
In [640]:
mat = scipy.io.loadmat("ex7faces.mat")
X = mat['X']
In [641]:
# One example of X
print(X[0])
print("\n")
print(X.shape)
In [642]:
plt.figure(figsize=(12,8))
magic_display(X[0:100,:])
In [626]:
X_norm, mu, sigma = featureNormalize(X)
In [627]:
# Finding eigenvectors using PCA algorithm
U, S, V = pca(X_norm)
In [628]:
# Visualize the top 36 eigenvectors found
plt.figure(figsize=(12,8))
magic_display(U[:,0:36].T, cmap= 'gist_ncar')
In [629]:
# Dimension Reduction for Faces
K = 150
Z = projectData(X_norm, U, K)
In [630]:
Z.shape
Out[630]:
In [631]:
# Visualization of Faces after PCA Dimension Reduction
K = 150
X_rec = recoverData(Z, U, K)
In [632]:
X_rec.shape
Out[632]:
In [634]:
plt.figure(figsize=(12,8))
plt.subplot(1,2,1)
plt.subplot(1,2,1).set_title("Original faces")
magic_display(X[0:100,:])
plt.subplot(1,2,2)
plt.subplot(1,2,2).set_title("Recovered faces")
magic_display(X_rec[0:100,:])
In [ ]: