In [1]:
import numpy as np
import scipy.sparse as sp
from mnist import MNIST
In [2]:
from ridge_regression import Ridge, RidgeRegularizationPath
In [3]:
import pandas as pd
%matplotlib inline
import seaborn as sns
In [4]:
import matplotlib as mpl
In [5]:
mndata = MNIST('./python-mnist/data')
train_ims, train_labels = mndata.load_training()
test_ims, test_labels = mndata.load_testing()
train_ims = np.array(train_ims)
test_ims = np.array(test_ims)
train_istwo = np.array([int(x==2) for x in train_labels])
test_istwo = np.array([int(x==2) for x in test_labels])
In [6]:
# save 1000 points for validation
num_pts = 50000
train_X = sp.csc_matrix(train_ims[:num_pts, ])
train_y = sp.csc_matrix([train_istwo[:num_pts, ]]).T
val_X = sp.csc_matrix(train_ims[num_pts:, ])
val_y = sp.csc_matrix([train_istwo[num_pts:, ]]).T
print("train X, y shapes: {}, {}".format(train_X.shape, train_y.shape))
print("val X, y shapes: {}, {}".format(val_X.shape, val_y.shape))
In [7]:
train_X.shape
Out[7]:
In [8]:
train_ims.shape
Out[8]:
In [ ]:
rp = RidgeRegularizationPath(train_X=train_X, train_y=train_y, lam_max=10, frac_decrease=0.5, steps=10,
val_X=val_X, val_y=val_y)
In [ ]:
rp.walk_path()
In [ ]:
assert False
In [ ]:
result = Ridge(X = train_X, y = train_y, lam = 1) # was 0.05 when running my HW
In [ ]:
result.solve_coeffs()
In [ ]:
import analyze_ridge_results
from imp import reload
In [ ]:
mpl.rcParams['figure.figsize'] = 4, 3
In [ ]:
to_compare = pd.DataFrame({"predictions": result.y_preds,
"label": result.y.toarray()[:,0]})
print(to_compare.head())
print(to_compare.shape)
plot = sns.violinplot(x='label', y='predictions',
data=to_compare, size=2)
plot.figure.savefig("MINST_regression_distributions.pdf")
In [ ]:
results = analyze_ridge_results.analyze_results(result, 0.30)
In [ ]:
results.keys()
In [ ]:
results['call_fracs']
In [ ]:
results['loss_01']
In [ ]:
print(result.y.shape)
result.y.toarray()[0:10, ]
In [ ]:
print(result.y_preds.shape)
result.y_preds[0:10]
In [ ]:
print(result.y.shape)
result.y[0:10,]
In [ ]:
diff = result.y - result.y_preds
In [ ]:
result.calc_square_loss()
In [ ]:
result.y_preds
In [ ]: