PCA visualizations of different classification datasets

  • Concentric n-dimen spheres

In [1]:
import numpy as np

In [13]:
# Generating n-dimen concentric spheres data
def generateNDSpheres(N=3, C=3, M_C=5000):
    # N is number of dimensions
    # C is number of classes
    # M_C is number of examples per class
    M = C*M_C
    X = np.zeros((M,N))
    y = np.zeros((M,1))
    for c in range(C):
        R = 100*(c+1)
        aggSum = np.zeros(M_C)
        for n in range(N):
            X2 = None
            if n == N-1:
                X2 = (R*R - aggSum)
            else:
                X2 = np.multiply((R*R-aggSum), np.random.rand(M_C))
            aggSum+=X2
            X[c*M_C:(c+1)*M_C,n] = np.multiply(2*np.random.randint(2, size=M_C)-1, np.sqrt(X2))
        y[c*M_C:(c+1)*M_C] = c
    return X,y

SX, Sy = generateNDSpheres(N=3, C=4, M_C=3000)

In [14]:
print(SX[:2])
print(Sy[:2])


[[-99.24523312  -9.21683828   8.0891035 ]
 [ 53.81215253  54.00885856 -64.70931491]]
[[ 0.]
 [ 0.]]

In [15]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.cm as cm
%matplotlib notebook
plt.ion()

fig = plt.figure(figsize=(9,6))
chart3d = fig.add_subplot(111, projection='3d')
c3d = chart3d.scatter(SX[:,0], SX[:,1], SX[:,2], c=Sy[:,0], cmap=cm.tab10)
chart3d.set_xlabel('x')
chart3d.set_ylabel('y')
chart3d.set_zlabel('z')
fig.colorbar(c3d)
plt.show()



In [16]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
pca_result = pca.fit_transform(SX)
print('Explained variation per principal component: ,', format(pca.explained_variance_ratio_))


Explained variation per principal component: , [ 0.50387268  0.25269765]

In [17]:
import matplotlib.cm as cm
%matplotlib notebook
plt.ion()

fig = plt.figure(figsize=(9,6))
chart2d = fig.add_subplot(111)
c2d = chart2d.scatter(pca_result[:,0], pca_result[:,1], c=Sy[:,0], cmap=cm.tab10)
chart2d.set_xlabel('x')
chart2d.set_ylabel('y')
fig.colorbar(c2d)
plt.show()