In [1]:
import sys
print(sys.version)
In [2]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
import time
import pandas as pd
import seaborn as sns
In [3]:
! pwd
In [4]:
! ls ../../HW3/code/
In [5]:
import sys
sys.path.append('../../HW3/code/')
In [6]:
from pca import Pca, plot_fractional_reconstruction_error
In [7]:
from classification_base import MNIST_PATH
from mnist_helpers import mnist_training, mnist_testing
In [8]:
train_X, train_y = mnist_training(shuffled=False)
#test_X, test_y = mnist_testing(shuffled=True)
In [9]:
X_num = train_X.shape[0]
train = Pca(train_X, dimensions=50, y=train_y, center=False)
In [10]:
train.calc_eigen_stuff()
In [11]:
train.eigenvals[0:10]
Out[11]:
In [12]:
train.sigma
Out[12]:
In [13]:
train.sum_of_top_eigenvalues()
Out[13]:
In [14]:
! ls '../figures'
In [15]:
train.fractional_reconstruction_error()
In [16]:
train.fractional_reconstruction_df.head(2)
Out[16]:
In [17]:
plot_fractional_reconstruction_error(train, start=0, stop=50,
title ='Fractional Reconstruction Error')
Out[17]:
In [18]:
plot_fractional_reconstruction_error(train, start=2, stop=50,
title ='Fractional Reconstruction Error')
Out[18]:
In [19]:
len(train.eigenvals[0:50])
Out[19]:
In [20]:
np.sum(train.eigenvals[0:50])
Out[20]:
In [21]:
np.round(train.eigenvals.copy(),1)[0:50]
Out[21]:
In [22]:
fig, ax = plt.subplots(1, 1, figsize=(3.5,3))
ax.get_yaxis().set_major_formatter(
mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
plot_x = range(1, 51)
plot_y = np.round(train.eigenvals.copy(),1)[0:50]
#plot_data = pd.DataFrame({'eigenvalue':np.round(train.eigenvals.copy(),1)[0:50],
# 'number': range(1,50+1)}).plot.scatter(x='number', y='eigenvalue')
plt.plot(plot_x, plot_y, linestyle='--', marker='o', color='b')
plt.title('first 50 eigenvalues')
plt.xlabel('eigenvalue')
plt.xlabel('value')
plt.tight_layout()
#fig.savefig('../figures/Q-1-2-1_first_50_eigenvalues.pdf')
In [23]:
np.save("../data/Q-0_PCA_eigenvalues_uncentered.npy", train.eigenvals)
In [24]:
np.save("../data/Q-0_PCA_eigenvectors_uncentered.npy", train.eigenvects)
In [25]:
train.eigenvects.shape
Out[25]:
In [26]:
train
Out[26]:
In [27]:
import pickle
pickle.dump(obj=train, file=open('../data/PCA_training_data_uncentered.pickle', "wb"))
In [28]:
! ls -l ../data/*.pickle
In [29]:
X_transformed_50 = train.transform_all_digits_down(n_components = 50)
In [30]:
X_transformed_50.shape
Out[30]:
In [31]:
np.save('../data/X_transformed_by_50_components_uncentered.npy', X_transformed_50)
In [32]:
X_test, y_test = mnist_testing(shuffled=False)
In [33]:
assert X_test.shape[0] == y_test.shape[0]
In [34]:
X_test.shape
Out[34]:
In [35]:
train.X = X_test
In [36]:
train.X.shape
Out[36]:
In [37]:
X_test_transformed_50 = train.transform_all_digits_down(n_components = 50)
In [38]:
X_test_transformed_50.shape
Out[38]:
In [39]:
np.save('../data/X_test_transformed_by_50_components_uncentered.npy', X_test_transformed_50)
In [ ]: