Must see:
In [ ]:
%matplotlib inline
In [ ]:
import numpy as np
from sklearn.preprocessing import StandardScaler
mu = np.zeros(2)
cov = np.array([[1.07, 0.63],
[0.63, 0.64]])
print("Cov:", cov)
# Check eigen vectors #################################
theorical_eigen_val, theorical_eigen_vect = np.linalg.eig(cov)
print("Theorical eigen vectors:", theorical_eigen_vect)
print("Theorical eigen values:", theorical_eigen_val)
# Make samples ########################################
X = np.random.multivariate_normal(mean=mu, cov=cov, size=[100])
#print("X:", X)
# Standardizing data ##################################
# TODO: explain why it's required...
X = StandardScaler().fit_transform(X)
# Compute the covariance matrix #######################
empirical_cov = np.cov(X.T)
print("Empirical cov:", empirical_cov)
# Compute eigen vectors ###############################
empirical_eigen_val, empirical_eigen_vect = np.linalg.eig(empirical_cov)
print("Empirical eigen vectors:", empirical_eigen_vect)
print("Empirical eigen values:", empirical_eigen_val)
# Project data in the new space #######################
projected_data = np.dot(X, empirical_eigen_vect)
# Plot ################################################
fig, ax = plt.subplots(figsize=(8, 8))
ax.plot(*X.T, '.r')
p1x = X[:,0].min()
p2x = X[:,0].max()
for axis in empirical_eigen_vect:
start, end = mu, mu + projected_data.std(axis=0).mean() * axis
ax.annotate(
'', xy=end, xycoords='data',
xytext=start, textcoords='data',
arrowprops=dict(facecolor='red', width=2.0))
ex, ey = empirical_eigen_vect[:,0]
p1y = ex * p1x / ey
p2y = ex * p2x / ey
ax.plot([p1x, p2x], [p1y, p2y], '-b')
ex, ey = empirical_eigen_vect[:,1]
p1y = ex * p1x / ey
p2y = ex * p2x / ey
ax.plot([p1x, p2x], [p1y, p2y], ':b')
ax.axis('equal');
In [ ]:
plt.plot(projected_data, '.r')