Import library and setup


In [1]:
%matplotlib inline

import os
from math import sqrt
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

fig_directory = '../figures'
if not os.path.exists(fig_directory):
    os.makedirs(fig_directory)

save_fig = True
fig_size = (6, 4.5)
fig_dpi = 200

Sample function


In [2]:
N = 100

def sample_x1(size=N):
    return np.random.normal(loc=3,scale=sqrt(9),size=size)

def sample_x2(size=N):
    x1 = sample_x1(size)
    return 0.5 * x1 + np.random.normal(loc=4,scale=sqrt(4),size=size)

Sample data


In [3]:
x1_data = sample_x1()
x2_data = sample_x2()
data = np.vstack((x1_data,x2_data))

Calculate stats and eigens


In [4]:
stats = {
    'mean':np.mean(data,axis=1),
    'cov':np.cov(data)
}

cov_eig_values, cov_eig_vectors = np.linalg.eig(stats['cov'])

Plot original data with eigenvectors


In [5]:
fig = plt.figure(figsize=fig_size)
plt.title('Original')
plt.xlim(-15.0, 15.0)
plt.ylim(-15.0, 15.0)
plt.xlabel('X1')
plt.ylabel('X2')
plt.scatter(x1_data,x2_data)
plt.quiver([stats['mean'][0], stats['mean'][0]],
           [stats['mean'][1], stats['mean'][1]],
           [cov_eig_vectors[0,0]*cov_eig_values[0],cov_eig_vectors[0,1]*cov_eig_values[1]],
           [cov_eig_vectors[1,0]*cov_eig_values[0],cov_eig_vectors[1,1]*cov_eig_values[1]],
           color='r', width=0.005)
plt.grid()
plt.show()
if save_fig:
    fig.savefig('{0}/original.png'.format(fig_directory), dpi=fig_dpi)


Plot transformed data


In [6]:
U = cov_eig_vectors[:,cov_eig_values.argsort()[::-1]]
UT = U.T

for i in xrange(data.shape[1]):
    data[:,i] -= stats['mean']
    data[:,i] = np.dot(UT, data[:,i])

fig = plt.figure(figsize=fig_size)
plt.title('Transformed')
plt.xlim(-15.0, 15.0)
plt.ylim(-15.0, 15.0)
plt.xlabel('Transformed X1')
plt.ylabel('Transformed X2')
plt.scatter(data[0],data[1])
plt.grid()
plt.show();
if save_fig:
    fig.savefig('{0}/transformed.png'.format(fig_directory), dpi=fig_dpi)



In [6]: