In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
sns.set()
import matplotlib.pyplot as plt
import env
from ml_mnist.knn import KNNClassifier
from ml_mnist.gp import GPClassifier
from ml_mnist.logreg import LogisticRegression
from ml_mnist.nn import NNClassifier, RBM
from ml_mnist.nn.layers import FullyConnected, Activation
from ml_mnist.nn.activations import leaky_relu
from ml_mnist.decomposition import PCA
from ml_mnist.preprocessing import StandardScaler
from ml_mnist.feature_selection import VarianceThreshold
from ml_mnist.model_selection import TrainTestSplitter, GridSearchCV
from ml_mnist.augmentation import RandomAugmentator
from ml_mnist.metrics import (accuracy_score,
zero_one_loss,
confusion_matrix,
plot_confusion_matrix)
from ml_mnist.utils import (one_hot, unhot,
Stopwatch, RNG,
plot_greyscale_image, plot_rbm_filters)
from ml_mnist.utils.dataset import load_mnist
from ml_mnist.utils.read_write import load_model
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [2]:
X, y = load_mnist(mode='train', path='data/')
X.shape
Out[2]:
In [3]:
plot_greyscale_image(X[0], title="Label is {0}".format(y[0]));
In [4]:
plot_greyscale_image(X[42], title="Label is {0}".format(y[42]));
In [ ]:
def load_small(n_samples=5000):
X, y = load_mnist(mode='train', path='data/')
X_scaled = X / 255.
X_scaled = VarianceThreshold(0.1).fit_transform(X_scaled)
X_scaled = StandardScaler(copy=False).fit_transform(X_scaled)
tts = TrainTestSplitter(shuffle=True, random_seed=1337)
indices, _ = tts.split(y, train_ratio=n_samples/60000., stratify=True)
return X_scaled[indices], y[indices] # 5000 -> 4994 training samples
In [6]:
X_scaled = X / 255.
print X_scaled.min(), X_scaled.max()
print X_scaled.shape
In [7]:
sns.heatmap(X_scaled[100:124, 100:124]); # lots of zeros ofc
In [8]:
X_scaled = VarianceThreshold(0.1).fit_transform(X_scaled)
print X_scaled.min(), X_scaled.max()
print X_scaled.shape
In [9]:
X_scaled = StandardScaler(copy=False).fit_transform(X_scaled)
print X_scaled.min(), X_scaled.max()
print X_scaled.shape
In [10]:
sns.heatmap(X_scaled[100:124, 100:124], cmap='RdYlGn'); # more interesting
In [11]:
knn = KNNClassifier(algorithm='brute')
knn
Out[11]:
In [12]:
with Stopwatch(verbose=True) as s:
knn.fit(X_scaled[:1000], y[:1000])
In [13]:
with Stopwatch(True) as s:
y_pred = knn.predict(X_scaled[1000:1100])
print zero_one_loss(y_pred, y[1000:1100])
In [14]:
knn2 = KNNClassifier(algorithm='kd_tree', leaf_size=10)
knn2
Out[14]:
In [15]:
with Stopwatch(True) as s:
knn2.fit(X_scaled[:1000], y[:1000])
In [16]:
with Stopwatch(True) as s:
y_pred = knn2.predict(X_scaled[1000:1100])
print zero_one_loss(y_pred, y[1000:1100])
This class will be used for convenient hyper-parameters grid search for simple models. This class desing as many others is inspired by one of sklearn
, yet have some extensions (such as model saving, that is supported for all models here, and possibility to specify order for parameters exploration).
One more feature is the following -- parameter refit
, which controls order of exploring parameters.
If set to True
, then for each combination of parameters we refit our model for each new train/test split to get mean accuracy score for given set of parameters as soon as possible. This makes sense for each ML algorithm (typically parametric), with explicit training procedure.
If set to False
, then for each possible split we fit our model once, and after that we evaluate this model on all possible combination of parameters. This makes sense and yields results significantly faster for such models (typically non-parametric), as k-NN, in particular.
Below small demo of output for refit
=True:
In [ ]:
param_grid = ({'weights': ['uniform', 'distance'], 'k': [2, 3]}, {'p': [1., np.inf], 'k': [2]})
grid_cv1 = GridSearchCV(model=KNNClassifier(algorithm='kd_tree', leaf_size=1), param_grid=param_grid,
train_test_splitter_params=dict(shuffle=True, random_seed=1337), n_splits=4,
refit=True, save_models=False, verbose=True)
grid_cv1.fit(X_scaled[:1000], y[:1000]); # rebuilding tree on each iteration
# Training KNNClassifier on 1000 samples x 444 features.
# 4-fold CV for each of 6 params combinations == 24 fits ...
# iter: 1/24 +--- elapsed: 1.026 sec ...
# iter: 2/24 ++-- elapsed: 2.022 sec ...
# iter: 3/24 +++- elapsed: 3.010 sec ...
# iter: 4/24 ++++ elapsed: 4.012 sec - mean acc.: 0.7940 +/- 2 * 0.038
# iter: 5/24 +--- elapsed: 5.017 sec - best acc.: 0.7940 at {'k': 2, 'weights': 'uniform'}
# iter: 6/24 ++-- elapsed: 6.017 sec - best acc.: 0.7940 at {'k': 2, 'weights': 'uniform'}
# iter: 7/24 +++- elapsed: 7.042 sec - best acc.: 0.7940 at {'k': 2, 'weights': 'uniform'}
# iter: 8/24 ++++ elapsed: 8.054 sec - mean acc.: 0.8070 +/- 2 * 0.029
# iter: 9/24 +--- elapsed: 9.093 sec - best acc.: 0.8070 at {'k': 2, 'weights': 'distance'}
# iter: 10/24 ++-- elapsed: 10.105 sec - best acc.: 0.8070 at {'k': 2, 'weights': 'distance'}
# iter: 11/24 +++- elapsed: 11.138 sec - best acc.: 0.8070 at {'k': 2, 'weights': 'distance'}
# iter: 12/24 ++++ elapsed: 12.157 sec - mean acc.: 0.8209 +/- 2 * 0.024
# iter: 13/24 +--- elapsed: 13.198 sec - best acc.: 0.8209 at {'k': 3, 'weights': 'uniform'}
# iter: 14/24 ++-- elapsed: 14.308 sec - best acc.: 0.8209 at {'k': 3, 'weights': 'uniform'}
# iter: 15/24 +++- elapsed: 15.596 sec - best acc.: 0.8209 at {'k': 3, 'weights': 'uniform'}
# iter: 16/24 ++++ elapsed: 16.607 sec - mean acc.: 0.7811 +/- 2 * 0.029
# iter: 17/24 +--- elapsed: 17.706 sec - best acc.: 0.8209 at {'k': 3, 'weights': 'uniform'}
# iter: 18/24 ++-- elapsed: 18.770 sec - best acc.: 0.8209 at {'k': 3, 'weights': 'uniform'}
# iter: 19/24 +++- elapsed: 19.840 sec - best acc.: 0.8209 at {'k': 3, 'weights': 'uniform'}
# iter: 20/24 ++++ elapsed: 20.889 sec - mean acc.: 0.8140 +/- 2 * 0.031
# iter: 21/24 +--- elapsed: 21.866 sec - best acc.: 0.8209 at {'k': 3, 'weights': 'uniform'}
# iter: 22/24 ++-- elapsed: 22.843 sec - best acc.: 0.8209 at {'k': 3, 'weights': 'uniform'}
# iter: 23/24 +++- elapsed: 23.811 sec - best acc.: 0.8209 at {'k': 3, 'weights': 'uniform'}
# iter: 24/24 ++++ elapsed: 24.766 sec - mean acc.: 0.4880 +/- 2 * 0.018
and for refit
=True: (difference is not big because too many features and too few parameter combinations are used)
In [ ]:
grid_cv2 = GridSearchCV(model=KNNClassifier(algorithm='kd_tree', leaf_size=1), param_grid=param_grid,
train_test_splitter_params=dict(shuffle=True, random_seed=1337), n_splits=4,
refit=False, save_models=False, verbose=True)
grid_cv2.fit(X_scaled[:1000], y[:1000]); # building tree only on each 6-th iteration
# Training KNNClassifier on 1000 samples x 444 features.
# 4-fold CV for each of 6 params combinations == 24 fits ...
# iter: 1/24 +--- elapsed: 1.019 sec - best acc.: 0.8110 [1/4 splits] at {'k': 2, 'weights': 'uniform'}
# iter: 2/24 +--- elapsed: 1.834 sec - best acc.: 0.8228 [1/4 splits] at {'k': 2, 'weights': 'distance'}
# iter: 3/24 +--- elapsed: 2.645 sec - best acc.: 0.8386 [1/4 splits] at {'k': 3, 'weights': 'uniform'}
# iter: 4/24 +--- elapsed: 3.448 sec - best acc.: 0.8386 [1/4 splits] at {'k': 3, 'weights': 'uniform'}
# iter: 5/24 +--- elapsed: 4.277 sec - best acc.: 0.8386 [1/4 splits] at {'k': 3, 'weights': 'uniform'}
# iter: 6/24 +--- elapsed: 5.058 sec - best acc.: 0.8386 [1/4 splits] at {'k': 3, 'weights': 'uniform'}
# iter: 7/24 ++-- elapsed: 6.073 sec - best acc.: 0.8055 [2/4 splits] at {'k': 2, 'weights': 'uniform'}
# iter: 8/24 ++-- elapsed: 6.878 sec - best acc.: 0.8174 [2/4 splits] at {'k': 2, 'weights': 'distance'}
# iter: 9/24 ++-- elapsed: 7.672 sec - best acc.: 0.8353 [2/4 splits] at {'k': 3, 'weights': 'uniform'}
# iter: 10/24 ++-- elapsed: 8.475 sec - best acc.: 0.8353 [2/4 splits] at {'k': 3, 'weights': 'uniform'}
# iter: 11/24 ++-- elapsed: 9.336 sec - best acc.: 0.8353 [2/4 splits] at {'k': 3, 'weights': 'uniform'}
# iter: 12/24 ++-- elapsed: 10.125 sec - best acc.: 0.8353 [2/4 splits] at {'k': 3, 'weights': 'uniform'}
# iter: 13/24 +++- elapsed: 11.311 sec - best acc.: 0.7806 [3/4 splits] at {'k': 2, 'weights': 'uniform'}
# iter: 14/24 +++- elapsed: 12.127 sec - best acc.: 0.7980 [3/4 splits] at {'k': 2, 'weights': 'distance'}
# iter: 15/24 +++- elapsed: 12.918 sec - best acc.: 0.8166 [3/4 splits] at {'k': 3, 'weights': 'uniform'}
# iter: 16/24 +++- elapsed: 13.722 sec - best acc.: 0.8166 [3/4 splits] at {'k': 3, 'weights': 'uniform'}
# iter: 17/24 +++- elapsed: 14.576 sec - best acc.: 0.8166 [3/4 splits] at {'k': 3, 'weights': 'uniform'}
# iter: 18/24 +++- elapsed: 15.538 sec - best acc.: 0.8166 [3/4 splits] at {'k': 3, 'weights': 'uniform'}
# iter: 19/24 ++++ elapsed: 16.519 sec - best acc.: 0.7940 +/- 2 * 0.038 at {'k': 2, 'weights': 'uniform'}
# iter: 20/24 ++++ elapsed: 17.322 sec - best acc.: 0.8070 +/- 2 * 0.029 at {'k': 2, 'weights': 'distance'}
# iter: 21/24 ++++ elapsed: 18.106 sec - best acc.: 0.8209 +/- 2 * 0.024 at {'k': 3, 'weights': 'uniform'}
# iter: 22/24 ++++ elapsed: 19.095 sec - best acc.: 0.8209 +/- 2 * 0.024 at {'k': 3, 'weights': 'uniform'}
# iter: 23/24 ++++ elapsed: 19.933 sec - best acc.: 0.8209 +/- 2 * 0.024 at {'k': 3, 'weights': 'uniform'}
# iter: 24/24 ++++ elapsed: 20.688 sec - best acc.: 0.8209 +/- 2 * 0.024 at {'k': 3, 'weights': 'uniform'}
best model as well as other "best" stuff are available:
In [ ]:
grid_cv2.best_model_
finally all results can be converted to pandas.DataFrame
and stored to excel or whatever. For more see docstrings in code.
In [ ]:
df = grid_cv2.to_df()
df.to_excel('test.xlsx')
df
In [ ]:
param_grid = {'weights': ['uniform', 'distance'],
'k': range(2, 31),
'p': [1., 2., 3., np.inf]}
param_order = ['k', 'weights', 'p']
grid_cv_knn_1 = GridSearchCV(model=KNNClassifier(algorithm='kd_tree', leaf_size=10),
param_grid=param_grid,
param_order=param_order,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=5,
refit=False,
save_models=True,
dirpath='tmp/',
save_params=dict(
params_mask=dict(kd_tree_=False), # do not save tree
json_params=dict(indent=4)),
verbose=True)
In [ ]:
[params for params in grid_cv_knn_1.gen_params()][:10]
In [ ]:
grid_cv_knn_1.number_of_combinations()
In [ ]:
grid_cv_knn_1.fit(X_knn_1, y_knn_1);
# Training KNNClassifier on 4994 samples x 444 features.
# 5-fold CV for each of 232 params combinations == 1160 fits ...
# iter: 1/1160 +---- elapsed: 34.320 sec - best acc.: 0.9084 [1/5 splits] at {'p': 1.0, 'k': 2, 'weights': 'uniform'}
# iter: 2/1160 +---- elapsed: 49.252 sec - best acc.: 0.9203 [1/5 splits] at {'p': 1.0, 'k': 2, 'weights': 'distance'}
# iter: 3/1160 +---- elapsed: 63.681 sec - best acc.: 0.9203 [1/5 splits] at {'p': 1.0, 'k': 2, 'weights': 'distance'}
# ...
# iter: 925/1160 ++++- elapsed: 20728.7 sec - best acc.: 0.9217 [4/5 splits] at {'p': 1.0, 'k': 3, 'weights': 'uniform'}
# iter: 926/1160 ++++- elapsed: 20780.0 sec - best acc.: 0.9217 [4/5 splits] at {'p': 1.0, 'k': 3, 'weights': 'uniform'}
# iter: 927/1160 ++++- elapsed: 20794.5 sec - best acc.: 0.9217 [4/5 splits] at {'p': 1.0, 'k': 3, 'weights': 'uniform'}
# iter: 928/1160 ++++- elapsed: 20809.0 sec - best acc.: 0.9217 [4/5 splits] at {'p': 1.0, 'k': 3, 'weights': 'uniform'}
# iter: 929/1160 +++++ elapsed: 20843.3 sec - best acc.: 0.9091 +/- 2 * 0.007 at {'p': 1.0, 'k': 2, 'weights': 'uniform'}
# iter: 930/1160 +++++ elapsed: 20858.1 sec - best acc.: 0.9195 +/- 2 * 0.003 at {'p': 1.0, 'k': 2, 'weights': 'distance'}
# iter: 931/1160 +++++ elapsed: 20872.5 sec - best acc.: 0.9195 +/- 2 * 0.003 at {'p': 1.0, 'k': 2, 'weights': 'distance'}
# ...
# iter: 1158/1160 +++++ elapsed: 25924.2 sec - best acc.: 0.9209 +/- 2 * 0.004 at {'p': 1.0, 'k': 3, 'weights': 'uniform'}
# iter: 1159/1160 +++++ elapsed: 25939.9 sec - best acc.: 0.9209 +/- 2 * 0.004 at {'p': 1.0, 'k': 3, 'weights': 'uniform'}
# iter: 1160/1160 +++++ elapsed: 25955.6 sec - best acc.: 0.9209 +/- 2 * 0.004 at {'p': 1.0, 'k': 3, 'weights': 'uniform'}
In [ ]:
df = grid_cv_knn_1.to_df()
df.to_excel('knn_1_full.xlsx')
df.sort_values(by='mean_score', ascending=False).head(10).to_excel('knn_1_best.xlsx')
Unfortunately, kd-trees in scipy
only supported for l_p metric, and not for custom function, so k-NN must be predicted in brute-force mode
In [ ]:
param_grid_0 = [{'weights': ['uniform'], 'k': range(2, 12 + 1)},
{'weights': ['distance'], 'k': (2, 3, 4)}]
param_grid = []
for d in param_grid_0:
d1 = d.copy()
d1.update({'kernel': ['rbf'],
'kernel_params': [dict(gamma=gamma) for gamma in np.logspace(-7, 2, 10)]})
param_grid.append(d1)
d2 = d.copy()
d2.update({'kernel': ['sigmoid'],
'kernel_params': [dict(gamma=gamma) for gamma in (1e-4, 1e-2, 1.)]})
param_grid.append(d2)
d3 = d.copy()
d3.update({'kernel': ['poly'],
'kernel_params': [dict(degree=degree) for degree in (2, 3, 4)]})
param_grid.append(d3)
param_order = [['kernel_params', 'k']] * len(param_grid)
grid_cv_knn_2 = GridSearchCV(model=KNNClassifier(algorithm='brute'),
param_grid=param_grid,
param_order=param_order,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=3,
refit=True,
save_models=True,
dirpath='tmp/',
save_params=dict(json_params=dict(indent=4)),
verbose=True)
In [ ]:
[params for params in grid_cv_knn_2.gen_params()][:3]
In [ ]:
grid_cv_knn_2.number_of_combinations()
In [ ]:
X_knn_2, y_knn_2 = load_small(2500)
grid_cv_knn_2.fit(X_knn_2, y_knn_2)
# Training KNNClassifier on 2494 samples x 444 features.
# 3-fold CV for each of 224 params combinations == 672 fits ...
# iter: 1/672 +-- elapsed: 99.099 sec ...
# iter: 2/672 ++- elapsed: 197.839 sec ...
# iter: 3/672 +++ elapsed: 294.787 sec - mean acc.: 0.8693 +/- 2 * 0.009
# iter: 4/672 +-- elapsed: 390.949 sec - best acc.: 0.8693 at {'kernel_params': {'gamma': 9.9999999999999995e-08}, 'k': 2, 'weights': 'uniform', 'kernel': 'rbf'}
# iter: 5/672 ++- elapsed: 487.090 sec - best acc.: 0.8693 at {'kernel_params': {'gamma': 9.9999999999999995e-08}, 'k': 2, 'weights': 'uniform', 'kernel': 'rbf'}
# ...
# iter: 668/672 ++- elapsed: 56102.7 sec - best acc.: 0.8889 at {'kernel_params': {'gamma': 9.9999999999999995e-08}, 'k': 2, 'weights': 'distance', 'kernel': 'rbf'}
# iter: 669/672 +++ elapsed: 56140.9 sec - mean acc.: 0.3946 +/- 2 * 0.015
# iter: 670/672 +-- elapsed: 56179.3 sec - best acc.: 0.8889 at {'kernel_params': {'gamma': 9.9999999999999995e-08}, 'k': 2, 'weights': 'distance', 'kernel': 'rbf'}
# iter: 671/672 ++- elapsed: 56217.2 sec - best acc.: 0.8889 at {'kernel_params': {'gamma': 9.9999999999999995e-08}, 'k': 2, 'weights': 'distance', 'kernel': 'rbf'}
# iter: 672/672 +++ elapsed: 56253.5 sec - mean acc.: 0.3797 +/- 2 * 0.020
In [ ]:
df = grid_cv_knn_2.to_df()
df.to_excel('knn_2_full.xlsx')
df.sort_values(by='mean_score', ascending=False).head(25).to_excel('knn_2_best.xlsx')
where $S$ is matrix with singular values of $X$, and even more interesting: $$ \mathbf{x}_{PCA}=W^T(\mathbf{x}-\pmb{\mu})= \frac{1}{\sqrt{n}}S \left(\sqrt{n}S^{-1}W^T\right)(\mathbf{x}-\pmb{\mu})= \frac{1}{\sqrt{n}}S\cdot\mathbf{x}_{PCA\;whitened}, $$
therefore computing distance between vectors after applying PCA w/o whitening is the same as to apply PCA whitening and then to compute distance between weighted vectors according to the respective singular values! (I wanted to try it as a separate approach, but it is == approach #3)
In [ ]:
X, y = load_mnist(mode='train', path='data/')
X /= 255.
with Stopwatch(verbose=True) as s:
pca = PCA().fit(X)
pca.save('models/pca_full.json') # ~13 Mb
In [ ]:
pca_full = load_model('models/pca_full.json'); pca_full
In [ ]:
sum(pca_full.explained_variance_ratio_[:154]) # <- to explain 95% of the variance we need 154 components
In [ ]:
def load_small2(n_samples):
X, y = load_mnist(mode='train', path='data/')
X_scaled = X / 255. # only divide by 255
tts = TrainTestSplitter(shuffle=True, random_seed=1337)
indices, _ = tts.split(y, train_ratio=n_samples/60000., stratify=True)
return X_scaled[indices], y[indices]
X_sm, y_sm = load_small2(1000) # approx
In [ ]:
param_grid = ({'weights': ['distance'],
'k': [2, 3, 4],
'p': [1, 2]
},
{'weights': ['uniform'],
'k': [2, 3, 4, 6, 9, 12, 15],
'p': [1, 2]
})
grid_search_params = dict(model=KNNClassifier(algorithm='kd_tree'),
param_grid=param_grid,
# param_order=param_order,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=3,
refit=False,
# save_models=True,
# dirpath='tmp/',
# save_params=dict(json_params=dict(indent=4)),
verbose=True)
for n_components in xrange(5, 151, 5):
print "[PCA n_components = {0}]\n\n".format(n_components)
X_current = pca_full.set_params(n_components=n_components, whiten=False).transform(X_sm)
grid_cv_knn_pca_1 = GridSearchCV(**grid_search_params).fit(X_current, y_sm)
df = grid_cv_knn_pca_1\
.to_df()\
.sort_values(by='mean_score', ascending=False)\
.to_excel('cv_results/knn_3_pca_{0}_{1:.4f}.xlsx'.format(n_components, grid_cv_knn_pca_1.best_score_))
print "\n\n"
# [PCA n_components = 5]
# Training KNNClassifier on 4994 samples x 5 features.
# 3-fold CV for each of 20 params combinations == 60 fits ...
# iter: 1/60 +-- elapsed: 0.673 sec - best acc.: 0.6936 [1/3 splits] at {'p': 1, 'k': 2, 'weights': 'distance'}
# iter: 2/60 +-- elapsed: 1.340 sec - best acc.: 0.6990 [1/3 splits] at {'p': 2, 'k': 2, 'weights': 'distance'}
# iter: 3/60 +-- elapsed: 1.998 sec - best acc.: 0.6990 [1/3 splits] at {'p': 2, 'k': 2, 'weights': 'distance'}
# ...
# iter: 58/60 +++ elapsed: 41.769 sec - best acc.: 0.7369 +/- 2 * 0.003 at {'p': 2, 'k': 12, 'weights': 'uniform'}
# iter: 59/60 +++ elapsed: 42.429 sec - best acc.: 0.7369 +/- 2 * 0.003 at {'p': 1, 'k': 15, 'weights': 'uniform'}
# iter: 60/60 +++ elapsed: 43.073 sec - best acc.: 0.7369 +/- 2 * 0.003 at {'p': 1, 'k': 15, 'weights': 'uniform'}
# ...
# ...
# ...
# iter: 58/60 +++ elapsed: 133.416 sec - best acc.: 0.9381 +/- 2 * 0.004 at {'p': 2, 'k': 2, 'weights': 'distance'}
# iter: 59/60 +++ elapsed: 136.472 sec - best acc.: 0.9381 +/- 2 * 0.004 at {'p': 2, 'k': 2, 'weights': 'distance'}
# iter: 60/60 +++ elapsed: 138.300 sec - best acc.: 0.9381 +/- 2 * 0.004 at {'p': 2, 'k': 2, 'weights': 'distance'}
# [PCA n_components = 115]
# Training KNNClassifier on 4994 samples x 115 features.
# 3-fold CV for each of 20 params combinations == 60 fits ...
# iter: 1/60 +-- elapsed: 3.008 sec - best acc.: 0.9263 [1/3 splits] at {'p': 1, 'k': 2, 'weights': 'distance'}
# iter: 2/60 +-- elapsed: 4.943 sec - best acc.: 0.9394 [1/3 splits] at {'p': 2, 'k': 2, 'weights': 'distance'}
In [ ]:
param_grid = ({'weights': ['distance'],
'k': [2, 3, 4],
'p': [1, 2]
},
{'weights': ['uniform'],
'k': [2, 3, 4, 6, 9, 12, 15],
'p': [1, 2]
})
grid_search_params = dict(model=KNNClassifier(algorithm='kd_tree'),
param_grid=param_grid,
# param_order=param_order,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=3,
refit=False,
# save_models=True,
# dirpath='tmp/',
# save_params=dict(json_params=dict(indent=4)),
verbose=False)
for n_components in xrange(10, 151, 5):
print "[PCA n_components = {0}]".format(n_components)
X_current = pca_full.set_params(n_components=n_components, whiten=True).transform(X_sm)
grid_cv_knn_pca_1 = GridSearchCV(**grid_search_params).fit(X_current, y_sm)
df = grid_cv_knn_pca_1\
.to_df()\
.sort_values(by='mean_score', ascending=False)\
.to_excel('cv_results/knn_3_pca_whiten_{0}_{1:.4f}.xlsx'.format(n_components, grid_cv_knn_pca_1.best_score_))
# [PCA n_components = 10]
# [PCA n_components = 15]
# [PCA n_components = 20]
# [PCA n_components = 25]
# [PCA n_components = 30]
# [PCA n_components = 35]
# [PCA n_components = 40]
# [PCA n_components = 45]
# [PCA n_components = 50]
# [PCA n_components = 55]
# [PCA n_components = 60]
# [PCA n_components = 65]
# [PCA n_components = 70]
# [PCA n_components = 75]
In [ ]:
param_grid = ({'weights': ['distance'],
'k': [2, 3, 4],
'kernel': ['rbf'],
'kernel_params': [dict(gamma=x) for x in [1e-1, 1e-2, 1e-4, 1e-6]]
},
{'weights': ['uniform'],
'k': [2, 3, 4, 6, 9, 12],
'kernel': ['rbf'],
'kernel_params': [dict(gamma=x) for x in [1e-1, 1e-2, 1e-4, 1e-6]]
},
{'weights': ['distance'],
'k': [2, 3, 4],
'kernel': ['poly'],
'kernel_params': [dict(degree=x) for x in [2, 3, 4]]
},
{'weights': ['uniform'],
'k': [2, 3, 4, 6],
'kernel': ['poly'],
'kernel_params': [dict(degree=x) for x in [2, 3, 4]]
})
grid_search_params = dict(model=KNNClassifier(algorithm='brute'),
param_grid=param_grid,
# param_order=param_order,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=3,
refit=True,
# save_models=True,
# dirpath='tmp/',
# save_params=dict(json_params=dict(indent=4)),
verbose=True)
for n_components in xrange(5, 151, 5):
print "[PCA n_components = {0}]\n\n".format(n_components)
X_current = pca_full.set_params(n_components=n_components, whiten=False).transform(X_sm)
grid_cv_knn_pca_2 = GridSearchCV(**grid_search_params).fit(X_current, y_sm)
df = grid_cv_knn_pca_2\
.to_df()\
.sort_values(by='mean_score', ascending=False)\
.to_excel('cv_results/knn_4_pca_krnl_{0}_{1:.4f}.xlsx'.format(n_components, grid_cv_knn_pca_2.best_score_))
print "\n"
# [PCA n_components = 5]
# Training KNNClassifier on 996 samples x 5 features.
# 3-fold CV for each of 57 params combinations == 171 fits ...
# iter: 1/171 +-- elapsed: 18.874 sec ...
# iter: 2/171 ++- elapsed: 39.243 sec ...
# iter: 3/171 +++ elapsed: 58.217 sec - mean acc.: 0.6879 +/- 2 * 0.029
# ...
# iter: 169/171 +-- elapsed: 2299.67 sec - best acc.: 0.7149 at {'kernel': 'rbf', 'k': 6, 'weights': 'uniform', 'kernel_params': {'gamma': 0.1}}
# iter: 170/171 ++- elapsed: 2306.23 sec - best acc.: 0.7149 at {'kernel': 'rbf', 'k': 6, 'weights': 'uniform', 'kernel_params': {'gamma': 0.1}}
# iter: 171/171 +++ elapsed: 2313.28 sec - mean acc.: 0.5814 +/- 2 * 0.011
# ...
# ...
# ...
# iter: 169/171 +-- elapsed: 1869.40 sec - best acc.: 0.8704 at {'kernel': 'rbf', 'k': 2, 'weights': 'distance', 'kernel_params': {'gamma': 0.1}}
# iter: 170/171 ++- elapsed: 1876.34 sec - best acc.: 0.8704 at {'kernel': 'rbf', 'k': 2, 'weights': 'distance', 'kernel_params': {'gamma': 0.1}}
# iter: 171/171 +++ elapsed: 1882.34 sec - mean acc.: 0.3715 +/- 2 * 0.043
# [PCA n_components = 95]
# Training KNNClassifier on 996 samples x 95 features.
# 3-fold CV for each of 57 params combinations == 171 fits ...
# iter: 1/171 +-- elapsed: 15.785 sec ...
# iter: 2/171 ++- elapsed: 31.366 sec ...
# iter: 3/171 +++ elapsed: 46.182 sec - mean acc.: 0.8674 +/- 2 * 0.024
# iter: 4/171 +-- elapsed: 60.642 sec - best acc.: 0.8674 at {'kernel': 'rbf', 'k': 2, 'weights': 'distance', 'kernel_params': {'gamma': 0.1}}
In [ ]:
param_grid = ({'weights': ['distance'],
'k': [2, 3, 4],
'kernel': ['rbf'],
'kernel_params': [dict(gamma=x) for x in [1e-1, 1e-2, 1e-4, 1e-6]]
},
{'weights': ['uniform'],
'k': [2, 3, 4, 6, 9, 12],
'kernel': ['rbf'],
'kernel_params': [dict(gamma=x) for x in [1e-1, 1e-2, 1e-4, 1e-6]]
},
{'weights': ['distance'],
'k': [2, 3, 4],
'kernel': ['poly'],
'kernel_params': [dict(degree=x) for x in [2, 3, 4]]
},
{'weights': ['uniform'],
'k': [2, 3, 4, 6],
'kernel': ['poly'],
'kernel_params': [dict(degree=x) for x in [2, 3, 4]]
})
grid_search_params = dict(model=KNNClassifier(algorithm='brute'),
param_grid=param_grid,
# param_order=param_order,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=3,
refit=True,
# save_models=True,
# dirpath='tmp/',
# save_params=dict(json_params=dict(indent=4)),
verbose=True)
for n_components in xrange(5, 151, 5):
print "[PCA n_components = {0}]\n\n".format(n_components)
X_current = pca_full.set_params(n_components=n_components, whiten=True).transform(X_sm)
grid_cv_knn_pca_2 = GridSearchCV(**grid_search_params).fit(X_current, y_sm)
df = grid_cv_knn_pca_2\
.to_df()\
.sort_values(by='mean_score', ascending=False)\
.to_excel('cv_results/knn_4_pca_krnl_whiten_{0}_{1:.4f}.xlsx'.format(n_components, grid_cv_knn_pca_2.best_score_))
print "\n"
# [PCA n_components = 5]
# Training KNNClassifier on 996 samples x 5 features.
# 3-fold CV for each of 57 params combinations == 171 fits ...
# iter: 1/171 +-- elapsed: 16.284 sec ...
# iter: 2/171 ++- elapsed: 32.904 sec ...
# iter: 3/171 +++ elapsed: 54.273 sec - mean acc.: 0.6939 +/- 2 * 0.018
# ...
# iter: 169/171 +-- elapsed: 2319.17 sec - best acc.: 0.7199 at {'kernel': 'rbf', 'k': 9, 'weights': 'uniform', 'kernel_params': {'gamma': 0.1}}
# iter: 170/171 ++- elapsed: 2325.68 sec - best acc.: 0.7199 at {'kernel': 'rbf', 'k': 9, 'weights': 'uniform', 'kernel_params': {'gamma': 0.1}}
# iter: 171/171 +++ elapsed: 2331.78 sec - mean acc.: 0.5984 +/- 2 * 0.013
# ...
# ...
# ...
# iter: 169/171 +-- elapsed: 2504.95 sec - best acc.: 0.7972 at {'kernel': 'rbf', 'k': 2, 'weights': 'distance', 'kernel_params': {'gamma': 0.1}}
# iter: 170/171 ++- elapsed: 2511.18 sec - best acc.: 0.7972 at {'kernel': 'rbf', 'k': 2, 'weights': 'distance', 'kernel_params': {'gamma': 0.1}}
# iter: 171/171 +++ elapsed: 2517.55 sec - mean acc.: 0.1124 +/- 2 * 0.001
# [PCA n_components = 85]
# Training KNNClassifier on 996 samples x 85 features.
# 3-fold CV for each of 57 params combinations == 171 fits ...
# iter: 1/171 +-- elapsed: 15.737 sec ...
# iter: 2/171 ++- elapsed: 32.295 sec ...
# iter: 3/171 +++ elapsed: 48.847 sec - mean acc.: 0.7892 +/- 2 * 0.035
In [18]:
X, y = load_mnist(mode='train', path='data/')
aug = RandomAugmentator(transform_shape=(28, 28), random_seed=1337)\
.add('RandomRotate', angle=(-10., 15.))\
.add('Dropout', p=(0., 0.1))\
.add('RandomGaussian', sigma=(0., 0.5))\
.add('RandomShift', x_shift=(-2, 2), y_shift=(-2, 2))
for z in aug.transform(X[:2]/255., 3):
plot_greyscale_image(z)
In [ ]:
pca_full = load_model('models/pca_full.json')
def load_big2():
X, y = load_mnist(mode='train', path='data/')
X_scaled = X / 255. # only divide by 255
tts = TrainTestSplitter(shuffle=True, random_seed=1337)
train, test = tts.split(y, train_ratio=50005./60000., stratify=True)
return X_scaled[train], y[train], X_scaled[test], y[test] # 49999 train, 10001 val
X_train, y_train, X_test, y_test = load_big2()
X_train = X_train[:5000]
y_train = y_train[:5000]
X_test = X_test[:1000]
y_test = y_test[:1000]
N = 3
aug = RandomAugmentator(transform_shape=(28, 28), random_seed=1337)
aug.add('RandomRotate', angle=(-7., 10.))
aug.add('RandomGaussian', sigma=(0., 0.5))
aug.add('RandomShift', x_shift=(-1, 1), y_shift=(-1, 1))
aug.add('Dropout', p=(0.8, 1.0))
X_train_aug = aug.transform(X_train, N)
y_train_aug = np.repeat(y_train, N + 1)
print X_train_aug.shape
pca_full.set_params(n_components=35, whiten=False)
X_train_aug = pca_full.transform(X_train_aug)
X_test = pca_full.transform(X_test)
knn = KNNClassifier(algorithm='kd_tree', k=2, p=2, weights='distance')
with Stopwatch(verbose=True) as s: knn.fit(X_train_aug, y_train_aug)
with Stopwatch(verbose=True) as t: y_pred = knn.predict(X_test)
print accuracy_score(y_test, y_pred)
In [ ]:
def load_big():
X, y = load_mnist(mode='train', path='data/')
X_scaled = X / 255.
X_scaled = VarianceThreshold(0.1).fit_transform(X_scaled)
X_scaled = StandardScaler(copy=False).fit_transform(X_scaled)
tts = TrainTestSplitter(shuffle=True, random_seed=1337)
train, test = tts.split(y, train_ratio=50005./60000., stratify=True)
return X_scaled[train], y[train], X_scaled[test], y[test] # 49999 train, 10001 val
In [ ]:
X_train, y_train, X_test, y_test = load_big()
In [ ]:
knns_best = []
# from approach 1
knns_best.append(KNNClassifier(algorithm='brute', k=3, p=1., weights='uniform'))
knns_best.append(KNNClassifier(algorithm='brute', k=2, p=1., weights='distance'))
# from approach 2
knns_best.append(KNNClassifier(algorithm='brute', k=2, weights='distance', kernel='rbf', kernel_params=dict(gamma=1e-5)))
knns_best.append(KNNClassifier(algorithm='brute', k=3, weights='uniform', kernel='rbf', kernel_params=dict(gamma=1e-5)))
In [ ]:
# -------------------------------------------
# def f(x):
# return knn._predict_x(x)
# from joblib import Parallel, delayed
# p = Parallel(n_jobs=1, max_nbytes=None)
# print p(delayed(f)(x) for x in X_test[:2]) # <-- NOT WORKING, CANNOT PICKLE INSTANCE METHODS
# ----------------------------------------------
import pathos.multiprocessing as mp
pool = mp.ProcessingPool(4)
for knn in knns_best:
knn.fit(X_train, y_train)
y_pred = pool.map(knn._predict_x, X_test) # knn.predict(X_test) in parallel
print accuracy_score(y_test, y_pred)
# 0.96650...
# 0.96400...
# 0.96110...
# 0.96150...
In [36]:
pca_full = load_model('models/pca_full.json')
In [37]:
def load_big2():
X, y = load_mnist(mode='train', path='data/')
X_scaled = X / 255. # only divide by 255
tts = TrainTestSplitter(shuffle=True, random_seed=1337)
train, test = tts.split(y, train_ratio=50005./60000., stratify=True)
return X_scaled[train], y[train], X_scaled[test], y[test] # 49999 train, 10001 val
X_train, y_train, X_test, y_test = load_big2()
In [21]:
pca_full.set_params(n_components=35)
X_train = pca_full.transform(X_train)
X_test = pca_full.transform(X_test)
knn = KNNClassifier(algorithm='kd_tree', k=3, p=2, weights='uniform')
with Stopwatch(verbose=True) as s: knn.fit(X_train, y_train) # Elapsed time: 0.064 sec
with Stopwatch(verbose=True) as t: y_pred = knn.predict(X_test) # Elapsed time: 18.823 sec <- FAST!
print accuracy_score(y_test, y_pred)
# 0.9754...
In [23]:
C = confusion_matrix(y_test, y_pred)
plot_confusion_matrix(C);
In [27]:
C = confusion_matrix(y_test, y_pred, normalize='cols')
plot_confusion_matrix(C, fmt=".2f");
In [ ]:
pca_full.set_params(n_components=35)
X_train = pca_full.transform(X_train)
X_test = pca_full.transform(X_test)
knn = KNNClassifier(algorithm='kd_tree', k=2, p=2, weights='distance')
with Stopwatch(verbose=True) as s: knn.fit(X_train, y_train) # Elapsed time: 0.067 sec
with Stopwatch(verbose=True) as t: y_pred = knn.predict(X_test) # Elapsed time: 17.848 sec
print accuracy_score(y_test, y_pred)
# 0.9751...
In [ ]:
pca_full.set_params(n_components=35)
X_train = pca_full.transform(X_train)
X_test = pca_full.transform(X_test)
knn = KNNClassifier(algorithm='kd_tree', k=2, p=1, weights='distance')
with Stopwatch(verbose=True) as s: knn.fit(X_train, y_train)
with Stopwatch(verbose=True) as t: y_pred = knn.predict(X_test)
print accuracy_score(y_test, y_pred)
# 0.9747...
In [ ]:
pca_full.set_params(n_components=30)
X_train = pca_full.transform(X_train)
X_test = pca_full.transform(X_test)
knn = KNNClassifier(algorithm='kd_tree', k=3, p=2, weights='uniform')
with Stopwatch(verbose=True) as s: knn.fit(X_train, y_train)
with Stopwatch(verbose=True) as t: y_pred = knn.predict(X_test)
print accuracy_score(y_test, y_pred)
# 0.9746...
In [ ]:
pca_full.set_params(n_components=35, whiten=True)
X_train = pca_full.transform(X_train)
X_test = pca_full.transform(X_test)
knn = KNNClassifier(algorithm='kd_tree', k=3, p=2, weights='uniform')
with Stopwatch(verbose=True) as s: knn.fit(X_train, y_train)
with Stopwatch(verbose=True) as t: y_pred = knn.predict(X_test)
print accuracy_score(y_test, y_pred)
# 0.9723...
In [ ]:
pca_full.set_params(n_components=35, whiten=False)
X_train = pca_full.transform(X_train)
X_test = pca_full.transform(X_test)
knn = KNNClassifier(algorithm='brute', k=3, weights='uniform', kernel='rbf', kernel_params=dict(gamma=1e-4))
knn.fit(X_train, y_train)
y_pred = []
for (i, x) in enumerate(X_test):
y_pred.append(knn._predict_x(x))
if (i + 1) % 10 == 0:
print "computed {0}/{1} ... accuracy {2:.4f}".format(i + 1, len(X_test), accuracy_score(y_test[:len(y_pred)], y_pred))
print accuracy_score(y_test, y_pred)
# ...
# computed 2960/10001 ... accuracy 0.9743
# computed 2970/10001 ... accuracy 0.9744
# ...
# computed 3030/10001 ... accuracy 0.9743
# computed 3040/10001 ... accuracy 0.9743
In [ ]:
pca_full.set_params(n_components=20, whiten=True)
X_train = pca_full.transform(X_train)
X_test = pca_full.transform(X_test)
knn = KNNClassifier(algorithm='brute', k=3, weights='uniform', kernel='rbf', kernel_params=dict(gamma=1e-4))
knn.fit(X_train, y_train)
y_pred = []
for (i, x) in enumerate(X_test):
y_pred.append(knn._predict_x(x))
if (i + 1) % 10 == 0:
print "computed {0}/{1} ... accuracy {2:.4f}".format(i + 1, len(X_test), accuracy_score(y_test[:len(y_pred)], y_pred))
print accuracy_score(y_test, y_pred)
# 0.9655...
In [17]:
pca_full = load_model('models/pca_full.json')
def load_big2(train_ratio=50005./60000.):
X, y = load_mnist(mode='train', path='data/')
X_scaled = X / 255. # only divide by 255
tts = TrainTestSplitter(shuffle=True, random_seed=1337)
train, test = tts.split(y, train_ratio=train_ratio, stratify=True)
return X_scaled[train], y[train], X_scaled[test], y[test]
X_train_orig, y_train_orig, X_test_orig, y_test_orig = load_big2(57000./60000.)
In [ ]:
# train_ratio=50005./60000.
pca_full.set_params(n_components=35, whiten=True)
z = pca_full.explained_variance_ratio_[:35]
z /= sum(z)
# for alpha in (1e-6, 1e-4, 1e-2, 0.1, 1., 10.):
# for alpha in np.logspace(0.0, 5.0, num=11):
# for alpha in (5., 7., 8., 9., 11., 12., 14., 16.):
for alpha in np.arange(11.0, 13.0, 0.2):
print "alpha =", alpha
X_train = pca_full.transform(X_train_orig)
X_test = pca_full.transform(X_test_orig)
X_train *= np.exp(alpha * z)
X_test *= np.exp(alpha * z)
# knn = KNNClassifier(algorithm='kd_tree', k=2, p=2, weights='distance')
# knn.fit(X_train, y_train)
# print knn.evaluate(X_test, y_test)
knn = KNNClassifier(algorithm='kd_tree', k=3, p=2, weights='uniform')
knn.fit(X_train, y_train)
print knn.evaluate(X_test, y_test)# train_ratio=50005./60000.
pca_full.set_params(n_components=35, whiten=True)
z = pca_full.explained_variance_ratio_[:35]
z /= sum(z)
# for alpha in (1e-6, 1e-4, 1e-2, 0.1, 1., 10.):
# for alpha in np.logspace(0.0, 5.0, num=11):
# for alpha in (5., 7., 8., 9., 11., 12., 14., 16.):
for alpha in np.arange(11.0, 13.0, 0.2):
print "alpha =", alpha
X_train = pca_full.transform(X_train_orig)
X_test = pca_full.transform(X_test_orig)
X_train *= np.exp(alpha * z)
X_test *= np.exp(alpha * z)
# knn = KNNClassifier(algorithm='kd_tree', k=2, p=2, weights='distance')
# knn.fit(X_train, y_train)
# print knn.evaluate(X_test, y_test)
knn = KNNClassifier(algorithm='kd_tree', k=3, p=2, weights='uniform')
knn.fit(X_train, y_train)
print knn.evaluate(X_test, y_test)
# alpha = 1e-06
# 0.971102889711
# 0.972302769723
# 0.971102889711
# alpha = 0.0001
# 0.971102889711
# 0.972302769723
# alpha = 0.01
# 0.971102889711
# 0.972302769723
# alpha = 0.1
# 0.971202879712
# 0.972302769723
# alpha = 1.0
# 0.97200279972
# 0.972802719728
# alpha = 10.0
# 0.973802619738
# 0.97700229977
# ...
# alpha = 5.0
# 0.973402659734
# 0.974802519748
# alpha = 7.0
# 0.97400259974
# 0.975602439756
# alpha = 8.0
# 0.974102589741
# 0.976202379762
# alpha = 9.0
# 0.973802619738
# 0.976302369763
# alpha = 11.0
# 0.97400259974
# 0.977302269773
# alpha = 12.0
# 0.974202579742
# 0.977502249775
# alpha = 14.0
# 0.973402659734
# 0.976602339766
# alpha = 16.0
# 0.972902709729
# 0.976202379762
# alpha = 11.2
# 0.977402259774
# alpha = 11.4
# 0.977602239776
# alpha = 11.6
# [*] 0.977802219778
# alpha = 11.8
# [*] 0.977802219778
# alpha = 12.0
# 0.977502249775
# alpha = 12.2
# 0.977402259774
# alpha = 12.4
In [ ]:
# train_ratio=57000./60000.
pca_full.set_params(n_components=35, whiten=True)
z = pca_full.explained_variance_ratio_[:35]
z /= sum(z)
alpha = 11.6
aug = RandomAugmentator(transform_shape=(28, 28), random_seed=1337)
aug.add('RandomRotate', angle=(-7., 10.))
aug.add('RandomGaussian', sigma=(0., 0.5))
aug.add('RandomShift', x_shift=(-1, 1), y_shift=(-1, 1))
aug.add('Dropout', p=(0., 0.2))
for N in xrange(10): # augment by a factor of (1 + N)
X_train = aug.transform(X_train_orig, N)
y_train = np.repeat(y_train_orig, N + 1)
X_train = pca_full.transform(X_train)
X_test = pca_full.transform(X_test_orig)
X_train *= np.exp(alpha * z)
X_test *= np.exp(alpha * z)
knn = KNNClassifier(algorithm='kd_tree', k=3, p=2, weights='uniform')
knn.fit(X_train, y_train)
print "N = {0}, acc. = {1:.5f}".format(N, knn.evaluate(X_test, y_test_orig))
# N = 0, acc. = 0.97904
# N = 1, acc. = 0.98137
# N = 2, acc. = 0.98137
# N = 3, acc. = 0.98303
# N = 4, acc. = 0.98337
# N = 5, acc. = 0.98370
# N = 6, acc. = 0.98370
# N = 7, acc. = 0.98237
# [*] N = 8, acc. = 0.98536
# N = 9, acc. = 0.98436
In [ ]:
nn = load_model('tmp/16nn.json')
X_train, _ = load_mnist('train', 'data/')
X_train /= 255.
nn.forward_pass(X_train)
np.save('data/train_feats.npy', leaky_relu(nn.layers[13]._last_input))
In [ ]:
X = np.load('data/train_feats.npy')
_, y = load_mnist('train', 'data/')
tts = TrainTestSplitter(shuffle=True, random_seed=1337)
train, test = tts.split(y, train_ratio=50005./60000., stratify=True) # 49999 : 10001
param_grid = dict(
k=[2, 3, 4, 5],
p=[1., 2., 3.],
weights=['uniform', 'distance']
)
grid_cv = GridSearchCV(None, param_grid=param_grid)
knn = KNNClassifier(algorithm='kd_tree')
knn.fit(X[train], y[train])
for params in grid_cv.gen_params():
knn.reset_params().set_params(**params)
acc = knn.evaluate(X[test], y[test])
print "{0:.4f} at {1}".format(acc, params)
# (Sorted)
# 0.9906 at {'p': 1.0, 'k': 5, 'weights': 'distance'}
# 0.9912 at {'p': 2.0, 'k': 5, 'weights': 'distance'}
# 0.9919 at {'p': 3.0, 'k': 5, 'weights': 'distance'}
# 0.9926 at {'p': 1.0, 'k': 4, 'weights': 'distance'}
# 0.9929 at {'p': 2.0, 'k': 4, 'weights': 'distance'}
# 0.9934 at {'p': 3.0, 'k': 4, 'weights': 'distance'}
# 0.9943 at {'p': 2.0, 'k': 3, 'weights': 'distance'}
# 0.9945 at {'p': 1.0, 'k': 3, 'weights': 'distance'}
# 0.9950 at {'p': 3.0, 'k': 3, 'weights': 'distance'}
# 0.9957 at {'p': 2.0, 'k': 2, 'weights': 'uniform'}
# 0.9958 at {'p': 3.0, 'k': 2, 'weights': 'uniform'}
# 0.9959 at {'p': 1.0, 'k': 2, 'weights': 'uniform'}
# 0.9960 at {'p': 2.0, 'k': 2, 'weights': 'distance'}
# 0.9962 at {'p': 3.0, 'k': 2, 'weights': 'distance'}
# 0.9963 at {'p': 1.0, 'k': 2, 'weights': 'distance'}
# 0.9964 at {'p': 2.0, 'k': 4, 'weights': 'uniform'}
# 0.9965 at {'p': 3.0, 'k': 4, 'weights': 'uniform'}
# 0.9967 at {'p': 1.0, 'k': 4, 'weights': 'uniform'}
# 0.9968 at {'p': 3.0, 'k': 3, 'weights': 'uniform'}
# 0.9969 at {'p': 1.0, 'k': 3, 'weights': 'uniform'}
# 0.9969 at {'p': 1.0, 'k': 5, 'weights': 'uniform'}
# 0.9970 at {'p': 2.0, 'k': 5, 'weights': 'uniform'}
# 0.9970 at {'p': 3.0, 'k': 5, 'weights': 'uniform'}
# [*] 0.9971 at {'p': 2.0, 'k': 3, 'weights': 'uniform'}
In [ ]:
X, y = load_mnist(mode='train', path='data/')
X /= 255.
train, test = TrainTestSplitter(shuffle=True, random_seed=1337).split(y, train_ratio=0.85)
y = one_hot(y)
logreg = LogisticRegression(n_batches=10,
random_seed=1337,
optimizer_params=dict(
max_epochs=100,
learning_rate=1e-3)
)
logreg.fit(X[train], y[train], X_val=X[test], y_val=y[test])
y_pred = logreg.predict(X[test])
print accuracy_score(y_pred, y[test])
# 0.92755...
In [ ]:
X, y = load_mnist(mode='train', path='data/')
X /= 255.
train, test = TrainTestSplitter(shuffle=True, random_seed=1337).split(y, train_ratio=0.85)
y = one_hot(y)
logreg = LogisticRegression(n_batches=10,
random_seed=1337,
optimizer_params=dict(
max_epochs=100,
learning_rate=1e-3)
)
logreg.fit(X[train], y[train], X_val=X[test], y_val=y[test])
y_pred = logreg.predict(X[test])
print accuracy_score(y_pred, y[test])
# 0.92766...
In [ ]:
X, y = load_mnist(mode='train', path='data/')
X /= 255.
train, test = TrainTestSplitter(shuffle=True, random_seed=1337).split(y, train_ratio=0.85)
y = one_hot(y)
for lr in (5 * 1e-5, 1e-4, 2 * 1e-4, 5 * 1e-4, 1e-3, 2 * 1e-3, 5 * 1e-3, 1e-2):
for L2 in (0., 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1.):
logreg = LogisticRegression(L2=L2,
n_batches=10,
random_seed=1337,
optimizer_params=dict(
max_epochs=600,
early_stopping=50,
# verbose=True,
learning_rate=lr,
plot=False)
)
logreg.fit(X[train], y[train], X_val=X[test], y_val=y[test])
acc = logreg.evaluate(X[test], y[test])
print "{0:.4f}, lr = {1}, L2 = {2}".format(acc, lr, L2)
# 0.9051, lr = 1e-05, L2 = 1e-06
# 0.9051, lr = 1e-05, L2 = 1e-05
# 0.9051, lr = 1e-05, L2 = 0.0001
# 0.9051, lr = 1e-05, L2 = 0.001
# 0.9049, lr = 1e-05, L2 = 0.01
# 0.9046, lr = 1e-05, L2 = 0.1
# 0.9009, lr = 1e-05, L2 = 1.0
# 0.9250, lr = 2e-05, L2 = 1e-06
# 0.9250, lr = 2e-05, L2 = 1e-05
# 0.9250, lr = 2e-05, L2 = 0.0001
# 0.9251, lr = 2e-05, L2 = 0.001
# 0.9248, lr = 2e-05, L2 = 0.01
# 0.9248, lr = 2e-05, L2 = 0.1
# 0.9268, lr = 5e-05, L2 = 0.0
# 0.9268, lr = 5e-05, L2 = 1e-06
# 0.9268, lr = 5e-05, L2 = 1e-05
# 0.9267, lr = 5e-05, L2 = 0.0001
# 0.9268, lr = 5e-05, L2 = 0.001
# 0.9270, lr = 5e-05, L2 = 0.01
# 0.9262, lr = 5e-05, L2 = 0.1
# 0.9216, lr = 5e-05, L2 = 1.0
# 0.9264, lr = 0.0001, L2 = 0.0
# 0.9264, lr = 0.0001, L2 = 1e-06
# 0.9266, lr = 0.0001, L2 = 1e-05
# 0.9266, lr = 0.0001, L2 = 0.0001
# 0.9266, lr = 0.0001, L2 = 0.001
# 0.9268, lr = 0.0001, L2 = 0.01
# 0.9262, lr = 0.0001, L2 = 0.1
# 0.9220, lr = 0.0001, L2 = 1.0
# 0.9267, lr = 0.0002, L2 = 0.0
# 0.9267, lr = 0.0002, L2 = 1e-06
# 0.9266, lr = 0.0002, L2 = 1e-05
# 0.9266, lr = 0.0002, L2 = 0.0001
# 0.9276, lr = 0.0002, L2 = 0.001
# 0.9264, lr = 0.0002, L2 = 0.01
# 0.9262, lr = 0.0002, L2 = 0.1
# 0.9218, lr = 0.0002, L2 = 1.0
# 0.9281, lr = 0.0005, L2 = 0.0
# 0.9281, lr = 0.0005, L2 = 1e-06
# 0.9282, lr = 0.0005, L2 = 1e-05
# 0.9280, lr = 0.0005, L2 = 0.0001
# 0.9278, lr = 0.0005, L2 = 0.001
# 0.9274, lr = 0.0005, L2 = 0.01
# 0.9264, lr = 0.0005, L2 = 0.1
# 0.9212, lr = 0.0005, L2 = 1.0
# 0.9276, lr = 0.001, L2 = 0.0
# 0.9277, lr = 0.001, L2 = 1e-06
# 0.9277, lr = 0.001, L2 = 1e-05
# 0.9277, lr = 0.001, L2 = 0.0001
# 0.9281, lr = 0.001, L2 = 0.001
# 0.9271, lr = 0.001, L2 = 0.01
# 0.9260, lr = 0.001, L2 = 0.1
# 0.9224, lr = 0.001, L2 = 1.0
# 0.9299, lr = 0.002, L2 = 0.0
# 0.9293, lr = 0.002, L2 = 1e-06
# 0.9292, lr = 0.002, L2 = 1e-05
# 0.9297, lr = 0.002, L2 = 0.0001
# 0.9292, lr = 0.002, L2 = 0.001
# 0.9291, lr = 0.002, L2 = 0.01
# 0.9281, lr = 0.002, L2 = 0.1
# 0.9232, lr = 0.002, L2 = 1.0
# 0.9294, lr = 0.005, L2 = 0.0
# [*] 0.9301, lr = 0.005, L2 = 1e-06
# 0.9294, lr = 0.005, L2 = 1e-05
# 0.9293, lr = 0.005, L2 = 0.0001
# 0.9294, lr = 0.005, L2 = 0.001
# 0.9299, lr = 0.005, L2 = 0.01
# 0.9277, lr = 0.005, L2 = 0.1
# 0.9227, lr = 0.005, L2 = 1.0
# 0.9274, lr = 0.01, L2 = 0.0
# 0.9266, lr = 0.01, L2 = 1e-06
# 0.9276, lr = 0.01, L2 = 1e-05
# 0.9286, lr = 0.01, L2 = 0.0001
# 0.9274, lr = 0.01, L2 = 0.001
# 0.9291, lr = 0.01, L2 = 0.01
# 0.9261, lr = 0.01, L2 = 0.1
# 0.9201, lr = 0.01, L2 = 1.0
In [ ]:
logregs = []
for i, n_components in enumerate(xrange(301, 401, 20)):
pca_full = load_model('models/pca_full.json')
pca_full.set_params(n_components=n_components, whiten=False)
X, y = load_mnist(mode='train', path='data/')
X /= 255.
X = pca_full.transform(X)
train, test = TrainTestSplitter(shuffle=True, random_seed=1337).split(y, train_ratio=0.85)
y = one_hot(y)
logreg = LogisticRegression(n_batches=10,
random_seed=1337,
optimizer_params=dict(
max_epochs=500,
learning_rate=1e-3,
plot=False)
)
logregs.append(logreg)
# logregs[i].set_params(optimizer_params=dict(max_epochs=100, learning_rate=1e-3, plot=False))
logreg.fit(X[train], y[train], X_val=X[test], y_val=y[test])
y_pred = logreg.predict(X[test])
print "PCA {0} --- {1:.4f}".format(n_components, accuracy_score(y_pred, y[test]))
# W/O whitening | with
# ---------------------------
# PCA 15 --- 0.8441 |
# PCA 20 --- 0.8783 |
# PCA 25 --- 0.8874 |
# PCA 30 --- 0.8936 | 0.8931
# PCA 35 --- 0.9027 | 0.9029
# PCA 40 --- 0.9056 | 0.9051
# PCA 45 --- 0.9076 | 0.9077
# PCA 50 --- 0.9087 | 0.9083
# PCA 55 --- 0.9132 | 0.9134
# PCA 60 --- 0.9129 | 0.9129
# PCA 65 --- 0.9133 | 0.9129
# PCA 70 --- 0.9176 | 0.9173
# PCA 75 --- 0.9189 | 0.9186
# PCA 80 --- 0.9206 | 0.9200
# PCA 85 --- 0.9207 | 0.9207
# PCA 90 --- 0.9213 | 0.9212
# PCA 95 --- 0.9203 | 0.9198
# PCA 100 --- 0.9184 | 0.9188
# PCA 105 --- 0.9203 | 0.9198
# PCA 110 --- 0.9209 | 0.9202
# PCA 115 --- 0.9210 | 0.9209
# PCA 120 --- 0.9217 | 0.9212
# PCA 125 | 0.9228 [*]
# PCA 130 | 0.9210
# PCA 135 | 0.9220
# PCA 140 | 0.9211
# PCA 145 | 0.9202
# PCA 150 | 0.9208
# PCA 155 | 0.9223
# PCA 165 --- 0.9210
# PCA 170 --- 0.9207
# PCA 175 --- 0.9214
# PCA 180 --- 0.9211
# PCA 185 --- 0.9208
# PCA 190 - -- 0.9208
# PCA 195 --- 0.9204
# PCA 200 --- 0.9208
# ...
# PCA 220 --- 0.9209
# PCA 230 --- 0.9214
# PCA 240 --- 0.9207
# ...
# PCA 301 --- 0.9207
# PCA 321 --- 0.9204
# ...
In [4]:
X, y = load_mnist(mode='train', path='data/')
X /= 255.
X = X.astype(np.float32)
aug = RandomAugmentator(transform_shape=(28, 28), random_seed=1337)
aug.add('RandomRotate', angle=(-5., 7.))
aug.add('RandomGaussian', sigma=(0., 0.5))
aug.add('RandomShift', x_shift=(-1, 1), y_shift=(-1, 1))
aug.add('Dropout', p=(0., 0.2))
X_aug = aug.transform(X, 4)
y_aug = np.repeat(y, 5)
y_aug = one_hot(y_aug)
np.save('data/X_aug_logreg.npy', X_aug)
np.save('data/y_aug_logreg.npy', y_aug)
In [5]:
X = np.load('data/X_aug_logreg.npy')
y = np.load('data/y_aug_logreg.npy')
train, test = TrainTestSplitter(shuffle=True, random_seed=1337).split(y, train_ratio=29./30.)
In [ ]:
X = np.load('data/X_aug_logreg.npy')#[:25000]
y = np.load('data/y_aug_logreg.npy')#[:25000]
train, test = TrainTestSplitter(shuffle=True, random_seed=1337).split(y, train_ratio=29./30.)
for lr in reversed([1e-2, 1e-3, 1e-4, 1e-5, 1e-6]):
for L2 in (1e-8, 1e-6, 1e-4, 1e-2, 1.):
plot = (L2 == 1e-8)
logreg = LogisticRegression(L2=L2,
n_batches=64,
# n_batches=10,
random_seed=1337,
optimizer_params=dict(
max_epochs=800,
# max_epochs=20,
early_stopping=50,
learning_rate=lr,
plot=plot,
plot_dirpath='learning_curves_logreg_{0}/'.format(lr)
))
logreg.fit(X[train], y[train], X_val=X[test], y_val=y[test])
acc = logreg.evaluate(X[test], y[test])
print "{0:.4f}, lr = {1}, L2 = {2}".format(acc, lr, L2)
s = '{0:.4f}'.format(acc).replace('.', '_')
t = 'models/logreg/logreg_{0}_{1}_{2}.json'.format(s, lr, L2)
logreg.save(t)
logreg_loaded = load_model(t)#.fit([[0.]], [[1]])
print "{0:.4f}".format(logreg.evaluate(X[test], y[test]))
# 0.7843, lr = 1e-06, L2 = 1e-08
# 0.7843, lr = 1e-06, L2 = 1e-06
# 0.7843, lr = 1e-06, L2 = 0.0001
# 0.7843, lr = 1e-06, L2 = 0.01
# 0.7855, lr = 1e-06, L2 = 1.0
# 0.8754, lr = 1e-05, L2 = 1e-08
# 0.8754, lr = 1e-05, L2 = 1e-06
# ...
# 0.8805, lr = 1e-4, L2 whaterer
# ...
# 0.86.., lr = 1e-3, L2 whaterer
In [15]:
pca_full = load_model('models/pca_full.json')
def load_big2():
X, y = load_mnist(mode='train', path='data/')
X_scaled = X / 255. # only divide by 255
tts = TrainTestSplitter(shuffle=True, random_seed=1337)
train, test = tts.split(y, train_ratio=50005./60000., stratify=True)
return X_scaled[train], y[train], X_scaled[test], y[test] # 49999 train, 10001 val
X_train_orig, y_train, X_test_orig, y_test = load_big2()
y_train = one_hot(y_train)
y_test = one_hot(y_test)
In [ ]:
pca_full.set_params(n_components=35, whiten=True)
z = pca_full.explained_variance_ratio_[:35]
z /= sum(z)
for alpha in (1e-6, 1e-4, 1e-2, 0.1, 1., 2., 5., 10., 16., 25., 100.):
print "alpha =", alpha
X_train = pca_full.transform(X_train_orig)
X_test = pca_full.transform(X_test_orig)
X_train *= np.exp(alpha * z)
X_test *= np.exp(alpha * z)
logreg = LogisticRegression(L2=1e-6,
n_batches=10,
random_seed=1337,
optimizer_params=dict(
max_epochs=600,
early_stopping=50,
# verbose=True,
learning_rate=0.005,
plot=False)
)
logreg.fit(X_train, y_train, X_val=X_test, y_val=y_test)
print logreg.evaluate(X_test, y_test)
# alpha = 1e-06
# 0.90800919908
# alpha = 0.0001
# 0.90800919908
# alpha = 0.01
# 0.90800919908
# alpha = 0.1
# 0.90800919908
# alpha = 1.0
# 0.90800919908
# alpha = 2.0
# 0.908109189081
# alpha = 5.0
# 0.907709229077
# alpha = 10.0
# 0.907809219078
# alpha = 16.0
# 0.907209279072
# alpha = 25.0
# 0.906409359064
# alpha = 100.0
# 0.505749425057
In [ ]:
X_train = np.load('data/train_feats.npy')
_, y_train = load_mnist('train', 'data/')
tts = TrainTestSplitter(shuffle=True, random_seed=1337)
train, val = tts.split(y_train, train_ratio=50005./60000., stratify=True) # 49999 : 10001
param_grid = dict(
L2=[0] + np.logspace(-4., 1., 11).tolist(),
)
logreg_params = dict(n_batches=32,
random_seed=1337,
optimizer_params=dict(
max_epochs=750,
learning_rate=0.001,
early_stopping=50,
plot=False,
verbose=False
))
for params in GridSearchCV(param_grid=param_grid).gen_params():
logreg = LogisticRegression(**logreg_params).set_params(**params)
logreg.fit(X_train[train], one_hot(y_train[train]), X_val=X_train[val], y_val=one_hot(y_train[val]))
acc = logreg.evaluate(X_train[val], one_hot(y_train[val]))
print "{0:.5f} at {1}".format(acc, val_acc, params)
# (Sorted)
# 0.99590 val at {'learning_rate': 0.005, 'L2': 3.1622776601683795}
# 0.99610 val at {'learning_rate': 0.005, 'L2': 0.31622776601683794}
# 0.99710 val at {'learning_rate': 0.005, 'L2': 0.0031622776601683794}
# 0.99710 val at {'learning_rate': 0.005, 'L2': 10.0}
# 0.99730 val at {'learning_rate': 0.005, 'L2': 0.031622776601683791}
# 0.99760 val at {'learning_rate': 0.005, 'L2': 0.0}
# 0.99770 val at {'learning_rate': 0.005, 'L2': 0.0001}
# 0.99780 val at {'learning_rate': 0.005, 'L2': 0.001}
# 0.99780 val at {'learning_rate': 0.005, 'L2': 0.01}
# 0.99790 val at {'learning_rate': 0.005, 'L2': 0.1}
# 0.99790 val at {'learning_rate': 0.005, 'L2': 1.0}
# [*] 0.99810 val at {'learning_rate': 0.005, 'L2': 0.00031622776601683794}
In [ ]:
X, y = load_mnist('train', 'data/')
indices, _ = TTS(shuffle=True, random_seed=1337).split(y, train_ratio=4.005/60., stratify=True)
X = X[indices]
X = X[:4000]
X /= 255.
param_grid = dict(
n_hidden=[128, 256, 384],
learning_rate=[0.05, 0.01, 0.005, '0.05->0.005', '0.01->0.001'],
k=[1, 4],
random_seed=[1337, 42],
)
rbm = RBM(persistent=True,
n_epochs=40,
early_stopping=12,
momentum='0.5->0.99',
batch_size=10,
verbose=False)
done = 0
for thr in (False, True):
if thr:
X = (X > 0.5).astype(np.float32)
for params in GS(param_grid=param_grid).gen_params(): # 60
done += 1
rbm.reset_params().set_params(**params)
rbm.fit(X)
mse = rbm.best_recon
dirpath = 'tmp/rbm_ge0.5/' if thr else 'tmp/rbm/'
rbm.save(dirpath + '{0:.5f}.json'.format(mse))
print "mse {0:.5f} [{1}/120] at {2}!".format(mse, done, params)
# (Sorted)
# [*] mse 0.06684 [25/120] at {'k': 1, 'random_seed': 1337, 'learning_rate': '0.01->0.001', 'n_hidden': 256}!
# ...
In [3]:
rbm = load_model('models/rbm.json')
In [4]:
plot_rbm_filters(rbm.best_W)
plt.savefig('rbm_filters.png')
In [13]:
# non-random nudging in all directions
X, y = load_mnist('train', 'data/')
X /= 255.
indices, _ = TrainTestSplitter(shuffle=True, random_seed=1337).split(y, train_ratio=4.005/60., stratify=True)
X = X[indices]
X = X[:4000]
X_aug = []
for x in X:
X_aug.append(x)
for t in RandomAugmentator(transform_shape=(28, 28), out_shape=(784,))\
.add('RandomShift', x_shift=(-1, -1), y_shift=( 0, 0))\
.transform_x(x, 1):
X_aug.append(t)
for t in RandomAugmentator(transform_shape=(28, 28), out_shape=(784,))\
.add('RandomShift', x_shift=( 1, 1), y_shift=( 0, 0))\
.transform_x(x, 1):
X_aug.append(t)
for t in RandomAugmentator(transform_shape=(28, 28), out_shape=(784,))\
.add('RandomShift', x_shift=( 0, 0), y_shift=( 1, 1))\
.transform_x(x, 1):
X_aug.append(t)
for t in RandomAugmentator(transform_shape=(28, 28), out_shape=(784,))\
.add('RandomShift', x_shift=( 0, 0), y_shift=(-1, -1))\
.transform_x(x, 1):
X_aug.append(t)
X_aug = np.asarray(X_aug)
np.save('data/X_rbm_small.npy', X_aug)
In [6]:
X_aug = np.load('data/X_rbm_small.npy')
In [ ]:
param_grid = dict(
learning_rate=['0.01->0.005', '0.05->0.001', '0.05->0.005', '0.01->0.001'],
batch_size=[5, 10, 20, 40],
random_seed=[1337, 42],
)
rbm = RBM(n_hidden=256,
k=1,
persistent=True,
n_epochs=60,
early_stopping=12,
momentum='0.5->0.99',
verbose=True)
done = 0
GS = GridSearchCV
for params in GS(param_grid=param_grid).gen_params():
done += 1
if done <= 16:
continue
rbm.reset_params().set_params(**params)
rbm.fit(X_aug)
mse = rbm.best_recon
rbm.save('tmp/rbm_{0:.5f}.json'.format(mse))
print "mse {0:.5f} [{1}/40] at {2}!".format(mse, done, params)
# (Sorted:)
# [*] mse 0.06809 [19/40] at {'learning_rate': '0.05->0.001', 'random_seed': 1337, 'batch_size': 20}!
# ...
In [ ]:
rbm = load_model('models/rbm.json')
X, _ = load_mnist('train', 'data/')
X /= 255.
F = np.dot(X, rbm.best_W) + rbm.hb # rbm.propup(X)
# F.min(), F.max(), F.mean() --> -3773.89447221 2.30920675476 -140.968359014
F = StandardScaler().fit_transform(F)
np.save('data/rbm_train.npy', F)
In [ ]:
X_train = np.load('data/rbm_train.npy')
_, y_train = load_mnist('train', 'data/')
tts = TrainTestSplitter(shuffle=True, random_seed=1337)
train, val = tts.split(y_train, train_ratio=50005./60000., stratify=True) # 49999 : 10001
param_grid = dict(
L2=np.logspace(-6., 1., 15),
)
logreg_params = dict(n_batches=32,
random_seed=1337,
optimizer_params=dict(
max_epochs=750,
learning_rate=0.001,
early_stopping=50,
plot=False,
verbose=False
))
for params in GridSearchCV(param_grid=param_grid).gen_params():
logreg = LogisticRegression(**logreg_params).set_params(**params)
logreg.fit(X_train[train], one_hot(y_train[train]), X_val=X_train[val], y_val=one_hot(y_train[val]))
acc = logreg.evaluate(X_train[val], one_hot(y_train[val]))
print "{0:.5f} at {1}".format(acc, val_acc, params)
# 0.91800 test 0.92251 val at {'L2': 9.9999999999999995e-07}
# 0.91760 test 0.92241 val at {'L2': 3.1622776601683792e-06}
# ... D:
In [7]:
X, y = load_mnist(mode='train', path='data/')
X /= 255.
X = X.astype(np.float32)
aug = RandomAugmentator(transform_shape=(28, 28), random_seed=1337)
aug.add('RandomRotate', angle=(-5., 7.))
aug.add('RandomGaussian', sigma=(0., 0.5))
aug.add('RandomShift', x_shift=(-1, 1), y_shift=(-1, 1))
X_aug = aug.transform(X, 4)
y_aug = np.repeat(y, 5)
y_aug = one_hot(y_aug)
np.save('data/X_aug_nn.npy', X_aug)
np.save('data/y_aug_nn.npy', y_aug)
In [2]:
X = np.load('data/X_aug_nn.npy')
y = np.load('data/y_aug_nn.npy')
train, test = TrainTestSplitter(shuffle=True, random_seed=1337).split(y, train_ratio=29./30.)
In [ ]:
print "Loading data ..."
X = np.load('data/X_aug_nn.npy')#[:30000]
y = np.load('data/y_aug_nn.npy')#[:30000]
train, test = TrainTestSplitter(shuffle=True, random_seed=1337).split(y, train_ratio=29./30.)
nn = NNClassifier(layers=[
FullyConnected(512),
Activation('leaky_relu'),
FullyConnected(256),
Activation('leaky_relu'),
FullyConnected(128),
Activation('leaky_relu'),
FullyConnected(32),
Activation('leaky_relu'),
FullyConnected(10),
Activation('softmax')
],
n_batches=1024,
shuffle=True,
random_seed=1337,
optimizer_params=dict(
max_epochs=100,
early_stopping=20,
verbose=True,
plot=True,
plot_dirpath='learning_curves_NN/',
learning_rate=1e-4
))
print "Initializing NN ..."
nn.fit(X[train], y[train], X_val=X[test], y_val=y[test])
print nn.evaluate(X[train], y[train], 'accuracy_score')
# 1) validation accuracy --> 0.9929
# 2) 512-256-128-32-10 Dropout(0.1) --> 0.9906
# 3) 512-256-128-32-10 Dropout(0.2) --> 0.9897
# 4) 600-300-128-32-10 --> 0.9879
# 5) 600-300-128-32-10 Dropout(0.1) --> 0.9914
# 6) 600-300-128-32-10 Dropout(0.12) --> 0.9895
# 7) 800-400-200-100-10 Dropout(0.12) --> 0.9929
# 8) 1024-512-256-128-10 Dropout(0.12) --> 0.9944
# 9) 1024-D.05-768-D.1-256-128-10 --> 0.9905
# 10.a) 1024-768-256-128-10 Dropout(0.1) --> 0.9923
# 10.b) 1024-768-256-128-10 Dropout(0.2) --> 0.9892
# 10.c) 1024-768-256-128-10 Dropout(1/4) --> 0.9857
# 10.d) 1024-768-256-128-10 Dropout(0.5) --> 0.9686
# (...)
In [ ]:
X, y = load_mnist(mode='train', path='data/')
X = X / 255.
X = X.astype(np.float32)
tts = TrainTestSplitter(shuffle=False, random_seed=1337)
train, val = tts.split(y, train_ratio=55005.98/60000., stratify=True) # 55k : 5k
X_train, y_train, X_val, y_val = X[train], y[train], X[val], y[val]
y_val = one_hot(y_val)
np.save('data/nn_X_val.npy', X_val)
np.save('data/nn_y_val.npy', y_val)
aug = RandomAugmentator(transform_shape=(28, 28), random_seed=1337)
aug.add('RandomRotate', angle=(-5., 7.))
aug.add('RandomGaussian', sigma=(0., 0.5))
aug.add('RandomShift', x_shift=(-1, 1), y_shift=(-1, 1))
X_train = aug.transform(X_train, 4)
y_train = np.repeat(y_train, 5)
y_train = one_hot(y_train)
np.save('data/nn_X_train.npy', X_train)
np.save('data/nn_y_train.npy', y_train)
In [2]:
# 1.a) 1024-D.05-768-D.1-256-128-10 --> 0.9880
# 1.b) 1024-D.05-768-D.05-256-128-10 --> 0.9868
# 1.c) 1024-768-256-128-10 --> 0.9896
# 2) 1000-800-800-500-250-10 --> 0.9824
# ... --> 0.9838
# (...)
# WORSE!
In [ ]:
# 11) 800-1024-512-256-128 --> 0.9933
# 12) 1337-911-666-128 --> 0.9923
# 13) 800-D.05-1024-D.1-512-256-128 --> 0.9936
# 14) 800-D.05-1024-D.1-512-D.1-256-128 --> 0.9928
# 15) 1337-D.05-911-D.1-666-128 --> 0.9939
# [*] 16) 1337-D.05-911-D.1-666-333-128 --> 0.9948
# 17) 1337-D.1-911-D.2-666-333-128 --> 0.9887
# 18) ... --> 0.9930
# 19) ... --> 0.9935
# 20) 2048-D.1-1337-D.2-666-333 --> 0.9896
# 21) 2048-D.15-1337-D.25-666-333 --> 0.9723
# 22) 2048-D.05-1337-D.1-666-333 --> 0.9936
# 23) 2048-D.1-1337-D.2-666-333-128 --> 0.9892
In [8]:
X, y = load_mnist('train', 'data/')
X /= 255.
y = one_hot(y)
gp = GPClassifier(algorithm='exact')
gp
Out[8]:
In [ ]:
gp.reset_K()
with Stopwatch(verbose=True):
gp.fit(X[:10], y[:10])
gp.reset_K()
with Stopwatch(verbose=True):
gp.fit(X[:100], y[:100])
gp.reset_K()
with Stopwatch(verbose=True):
gp.fit(X[:1000], y[:1000])
gp.reset_K()
with Stopwatch(verbose=True):
gp.fit(X[:2000], y[:2000])
# Elapsed time: 0.046 sec
# Elapsed time: 0.518 sec
# Elapsed time: 59.686 sec
# Elapsed time: 298.424 sec
In [ ]:
gp = GPClassifier(algorithm='cg')
gp.reset_K()
with Stopwatch(verbose=True):
gp.fit(X[:100], y[:100])
gp.reset_K()
with Stopwatch(verbose=True):
gp.fit(X[:1000], y[:1000])
gp.reset_K()
with Stopwatch(verbose=True):
gp.fit(X[:2000], y[:2000])
# Elapsed time: 0.044 sec
# Elapsed time: 0.262 sec
# Elapsed time: 50.412 sec
# Elapsed time: 259.823 sec
In [2]:
sigma_n = np.concatenate(([0], np.logspace(-8., -4., 2)))
length_scale = np.logspace(-1., 2., 19)
gamma = 0.5/length_scale**2
# sigma_f = np.logspace(-2., 2., 7)
param_grid = ({'sigma_n': sigma_n,
'kernel_params': [dict(sigma=1., gamma=gamma_) for gamma_ in gamma]},
{'sigma_n': sigma_n,
'kernel_params': [dict(sigma=0.1, gamma=gamma_) for gamma_ in gamma]},
{'sigma_n': sigma_n,
'kernel_params': [dict(sigma=10., gamma=gamma_) for gamma_ in gamma]})
grid_cv = GridSearchCV(model=GPClassifier(algorithm='cg', random_seed=1337, tol=1e-8, cg_tol=1e-7, n_samples=1500),
param_grid=param_grid,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=2,
refit=True,
verbose=True)
print grid_cv.number_of_combinations()
In [3]:
[params for params in grid_cv.gen_params()][:3]
Out[3]:
In [4]:
X, y = load_mnist(mode='train', path='data/')
X /= 255.
st = StandardScaler(copy=False, with_mean=True, with_std=False)
X = st.fit_transform(X)
tts = TrainTestSplitter(random_seed=1337, shuffle=True)
indices, _ = tts.split(y, train_ratio=0.02, stratify=True) # 1195 samples
X = X[indices]
y = y[indices]
In [ ]:
grid_cv.fit(X, y);
# Training GPClassifier on 1195 samples x 784 features.
# 2-fold CV for each of 171 params combinations == 342 fits ...
# iter: 1/342 +- elapsed: 21.159 sec ...
# iter: 2/342 ++ elapsed: 35.444 sec - mean acc.: 0.1113 +/- 2 * 0.014
# iter: 3/342 +- elapsed: 49.669 sec - best acc.: 0.1113 at {'kernel_params': {'sigma': 1.0, 'gamma': 49.999999999999993}, 'sigma_n': 0.0}
# ...
# ...
# ...
# iter: 340/342 ++convergence is not reached
# elapsed: 16914.8 sec - mean acc.: 0.1046 +/- 2 * 0.001
# iter: 341/342 +-convergence is not reached
# elapsed: 17005.8 sec - best acc.: 0.6686 at {'kernel_params': {'sigma': 0.1, 'gamma': 0.049999999999999989}, 'sigma_n': 0.0}
# iter: 342/342 ++convergence is not reached
# elapsed: 17083.8 sec - mean acc.: 0.1046 +/- 2 * 0.001
In [7]:
df = grid_cv.to_df()
df.to_excel('cv_results/gp_raw_full.xlsx')
df.sort_values(by='mean_score', ascending=False).head(25).to_excel('cv_results/gp_raw_best.xlsx')
In [26]:
pca_full = load_model('models/pca_full.json')
X, y = load_mnist(mode='train', path='data/')
X /= 255.
# st = StandardScaler(copy=False, with_mean=True, with_std=False)
# X = st.fit_transform(X)
tts = TrainTestSplitter(random_seed=1337, shuffle=True)
indices, _ = tts.split(y, train_ratio=0.02, stratify=True) # 1195 samples
X = X[indices]
y = y[indices]
In [ ]:
# for n_components in xrange(5, 151, 5):
for n_components in xrange(12, 25):
gamma = np.array([0.3, 0.6, 1.5, 3.0, 5.1]) / n_components
param_grid = {'sigma_n': [1e-8],
'kernel_params': [dict(sigma=0.1, gamma=gamma_) for gamma_ in gamma]}
grid_cv_params = dict(model=GPClassifier(algorithm='cg', random_seed=1337, tol=1e-8, cg_tol=1e-7, n_samples=1500),
param_grid=param_grid,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=2,
refit=True,
verbose=True)
print "[PCA n_components = {0}]\n\n".format(n_components)
X_current = pca_full.set_params(n_components=n_components, whiten=False).transform(X)
grid_cv = GridSearchCV(**grid_cv_params).fit(X_current, y)
df = grid_cv\
.to_df()\
.sort_values(by='mean_score', ascending=False)\
.to_excel('cv_results/gp_pca_{0}_{1:.4f}.xlsx'.format(n_components, grid_cv.best_score_))
print "\n\n"
# [PCA n_components = 5]
# Training GPClassifier on 1195 samples x 5 features.
# 2-fold CV for each of 5 params combinations == 10 fits ...
# iter: 1/10 +- elapsed: 11.139 sec ...
# iter: 2/10 ++ elapsed: 22.108 sec - mean acc.: 0.5940 +/- 2 * 0.040
# iter: 3/10 +- elapsed: 32.947 sec - best acc.: 0.5940 at {'kernel_params': {'sigma': 0.1, 'gamma': 0.059999999999999998}, 'sigma_n': 1e-08}
# iter: 4/10 ++ elapsed: 41.796 sec - mean acc.: 0.6384 +/- 2 * 0.034
# iter: 5/10 +- elapsed: 49.648 sec - best acc.: 0.6384 at {'kernel_params': {'sigma': 0.1, 'gamma': 0.12}, 'sigma_n': 1e-08}
# iter: 6/10 ++ elapsed: 56.744 sec - mean acc.: 0.6728 +/- 2 * 0.018
# iter: 7/10 +- elapsed: 63.334 sec - best acc.: 0.6728 at {'kernel_params': {'sigma': 0.1, 'gamma': 0.29999999999999999}, 'sigma_n': 1e-08}
# iter: 8/10 ++ elapsed: 70.164 sec - mean acc.: 0.6410 +/- 2 * 0.012
# iter: 9/10 +- elapsed: 75.789 sec - best acc.: 0.6728 at {'kernel_params': {'sigma': 0.1, 'gamma': 0.29999999999999999}, 'sigma_n': 1e-08}
# iter: 10/10 ++ elapsed: 81.808 sec - mean acc.: 0.5172 +/- 2 * 0.003
# ...
# ...
# ...
# iter: 8/10 ++ elapsed: 80.497 sec - mean acc.: 0.7422 +/- 2 * 0.032
# iter: 9/10 +- elapsed: 85.950 sec - best acc.: 0.7481 at {'kernel_params': {'sigma': 0.1, 'gamma': 0.042857142857142858}, 'sigma_n': 1e-08}
# iter: 10/10 ++ elapsed: 91.393 sec - mean acc.: 0.5288 +/- 2 * 0.012
# [PCA n_components = 40]
# Training GPClassifier on 1195 samples x 40 features.
# 2-fold CV for each of 5 params combinations == 10 fits ...
# iter: 1/10 +- elapsed: 14.542 sec ...
# iter: 2/10 ++ elapsed: 28.153 sec - mean acc.: 0.5832 +/- 2 * 0.016
# iter: 3/10 +- elapsed: 39.689 sec - best acc.: 0.5832 at {'kernel_params': {'sigma': 0.1, 'gamma': 0.0074999999999999997}, 'sigma_n': 1e-08}
In [ ]:
n_components = 20
whiten = False
X = pca_full.set_params(n_components=n_components, whiten=whiten).transform(X)
X = StandardScaler(copy=False, with_mean=True, with_std=False).fit_transform(X)
sigma_n = [0., 1e-8, 1e-6, 1e-4, 1e-2]
sigma_f = np.logspace(-2., 1., 6)
gamma = np.linspace(0.04, 0.12, 16, True)
param_grid = [{'sigma_n': sigma_n, 'kernel_params': [dict(sigma=sigma, gamma=gamma_) for gamma_ in gamma]} for sigma in sigma_f]
grid_cv = GridSearchCV(model=GPClassifier(algorithm='cg',
random_seed=1337,
max_iter=200,
tol=1e-8,
cg_tol=1e-7,
n_samples=1500),
param_grid=param_grid,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=2,
refit=True,
verbose=True)
grid_cv.number_of_combinations() # 480
grid_cv.fit(X, y);
# Training GPClassifier on 1195 samples x 20 features.
# 2-fold CV for each of 480 params combinations == 960 fits ...
# iter: 1/960 +- elapsed: 1.409 sec ...
# iter: 2/960 ++ elapsed: 2.614 sec - mean acc.: 0.6368 +/- 2 * 0.023
# iter: 3/960 +- elapsed: 3.875 sec - best acc.: 0.6368 at {'kernel_params': {'sigma': 0.01, 'gamma': 0.040000000000000001}, 'sigma_n': 0.0}
# ...
# ...
# ...
# iter: 958/960 ++convergence is not reached
# elapsed: 9239.23 sec - mean acc.: 0.2006 +/- 2 * 0.075
# iter: 959/960 +-convergence is not reached
# elapsed: 9253.45 sec - best acc.: 0.8677 at {'kernel_params': {'sigma': 0.63095734448019303, 'gamma': 0.082666666666666666}, 'sigma_n': 0.0}
# iter: 960/960 ++convergence is not reached
# elapsed: 9267.46 sec - mean acc.: 0.7169 +/- 2 * 0.094
In [30]:
# df = grid_cv.to_df()
# df.to_excel('cv_results/gp_2_full.xlsx')
df.sort_values(by='mean_score', ascending=False).head(64).to_excel('cv_results/gp_2_best.xlsx')
In [ ]:
for n_components in xrange(5, 151, 5):
gamma = np.array([0.3, 0.6, 1.5, 3.0, 5.1]) / n_components
param_grid = {'sigma_n': [1e-8],
'kernel_params': [dict(sigma=0.1, gamma=gamma_) for gamma_ in gamma]}
grid_cv_params = dict(model=GPClassifier(algorithm='cg', random_seed=1337, tol=1e-8, cg_tol=1e-7, n_samples=1500),
param_grid=param_grid,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=2,
refit=True,
verbose=True)
print "[PCA n_components = {0}]\n\n".format(n_components)
X_current = pca_full.set_params(n_components=n_components, whiten=True).transform(X)
grid_cv = GridSearchCV(**grid_cv_params).fit(X_current, y)
df = grid_cv\
.to_df()\
.sort_values(by='mean_score', ascending=False)\
.to_excel('cv_results/gp_pca_whiten_{0}_{1:.4f}.xlsx'.format(n_components, grid_cv.best_score_))
print "\n\n"
# the best is 0.79.. <-- worse
In [ ]:
pca_full = load_model('models/pca_full.json')
X, y = load_mnist(mode='train', path='data/')
X /= 255.
tts = TrainTestSplitter(random_seed=1337, shuffle=True)
indices, _ = tts.split(y, train_ratio=0.03, stratify=True) # 1794 samples
X = X[indices]
y = y[indices]
X = pca_full.set_params(n_components=20, whiten=True).transform(X)
X = StandardScaler(copy=False, with_mean=True, with_std=False).fit_transform(X)
z = pca_full.explained_variance_ratio_[:20]
z /= sum(z)
train, test = tts.split(y, train_ratio=0.5, stratify=True)
# for alpha in np.logspace(-6., 2., 9):
# for alpha in np.logspace(-3., 1.2, 9):
# for alpha in np.arange(1.4, 9.8, 0.4):
# for alpha in np.arange(5.4, 6.6, 0.1):
for alpha in np.arange(6.05, 6.15, 0.01):
X_train = X[train] * np.exp(alpha * z)
X_test = X[test] * np.exp(alpha * z)
gp = GPClassifier(algorithm='cg',
sigma_n=1e-8,
kernel_params=dict(sigma=0.1, gamma=0.075),
n_samples=1500,
tol=1e-7,
max_iter=200,
random_seed=1337,
cg_tol=1e-7)
gp.fit(X_train, y[train])
acc = gp.evaluate(X_test, y[test])
print "{0:.4f}, alpha = {1}".format(acc, alpha)
# 0.8122, alpha = 0.01
# 0.8111, alpha = 0.1
# 0.8211, alpha = 1.0
# -----
# 0.8111, alpha = 0.125892541179
# 0.8122, alpha = 0.421696503429
# 0.8244, alpha = 1.41253754462
# 0.8511, alpha = 4.73151258961
# 0.4056, alpha = 15.8489319246
# -----
# 0.8478, alpha = 4.6
# 0.8500, alpha = 5.0
# 0.8433, alpha = 5.4
# 0.8578, alpha = 5.8
# 0.8544, alpha = 6.2
# 0.8500, alpha = 6.6
# ----
# 0.8578, alpha = 5.9
# 0.8578, alpha = 6.0
# 0.8589, alpha = 6.1
# 0.8544, alpha = 6.2
# ----
# 0.8556, alpha = 6.08
# 0.8589, alpha = 6.09
# 0.8589, alpha = 6.1
# 0.8556, alpha = 6.11
In [18]:
pca_full = load_model('models/pca_full.json')
X, y = load_mnist(mode='train', path='data/')
X /= 255.
# st = StandardScaler(copy=False, with_mean=True, with_std=False)
# X = st.fit_transform(X)
tts = TrainTestSplitter(random_seed=1337, shuffle=True)
indices, _ = tts.split(y, train_ratio=0.02, stratify=True) # 1195 samples
X = X[indices]
y = y[indices]
In [ ]:
n_components = 20
whiten = True
X = pca_full.set_params(n_components=n_components, whiten=whiten).transform(X)
alpha = 6.1
z = pca_full.explained_variance_ratio_[:20]
z /= sum(z)
X *= np.exp(alpha * z)
X = StandardScaler(copy=False, with_mean=True, with_std=False).fit_transform(X)
sigma_n = [0., 1e-4, 1e-2]
sigma_f = [0.1, 0.3, 0.5, 0.7, 0.9]
gamma = np.linspace(0.08, 0.11, 7, True)
param_grid = [{'sigma_n': sigma_n, 'kernel_params': [dict(sigma=sigma, gamma=gamma_) for gamma_ in gamma]} for sigma in sigma_f]
grid_cv = GridSearchCV(model=GPClassifier(algorithm='cg',
random_seed=1337,
max_iter=200,
tol=1e-8,
cg_tol=1e-7,
n_samples=1500),
param_grid=param_grid,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=2,
refit=True,
verbose=True)
print grid_cv.number_of_combinations() # 105
grid_cv.fit(X, y);
# Training GPClassifier on 1195 samples x 20 features.
# 2-fold CV for each of 105 params combinations == 210 fits ...
# iter: 1/210 +-convergence is not reached
# elapsed: 9.902 sec ...
# iter: 2/210 ++convergence is not reached
# elapsed: 19.138 sec - mean acc.: 0.7798 +/- 2 * 0.030
# iter: 3/210 +-convergence is not reached
# elapsed: 28.945 sec - best acc.: 0.7798 at {'kernel_params': {'sigma': 0.1, 'gamma': 0.080000000000000002}, 'sigma_n': 0.0}
# ...
# ...
# ...
# iter: 208/210 ++convergence is not reached
# elapsed: 2135.44 sec - mean acc.: 0.7606 +/- 2 * 0.022
# iter: 209/210 +-convergence is not reached
# elapsed: 2145.34 sec - best acc.: 0.8702 at {'kernel_params': {'sigma': 0.7, 'gamma': 0.080000000000000002}, 'sigma_n': 0.01}
# iter: 210/210 ++convergence is not reached
# elapsed: 2155.47 sec - mean acc.: 0.7615 +/- 2 * 0.022
In [12]:
df = grid_cv.to_df()
df.to_excel('cv_results/gp_3_full.xlsx')
df.sort_values(by='mean_score', ascending=False).head(64).to_excel('cv_results/gp_3_best.xlsx')
In [ ]:
n_components = 20
whiten = False
X = pca_full.set_params(n_components=n_components, whiten=whiten).transform(X)
X = StandardScaler(copy=False, with_mean=True, with_std=False).fit_transform(X)
sigma_n = [0., 1e-8, 1e-6]
l = np.logspace(-1., 2., 12)
alpha = np.logspace(0., 2., 5)
param_grid = [{'sigma_n': sigma_n,
'kernel_params': [dict(sigma=0.1, alpha=alpha_, l=l_) for alpha_ in alpha]} for l_ in l]
grid_cv = GridSearchCV(model=GPClassifier(algorithm='cg',
kernel='RationalQuadratic',
random_seed=1337,
max_iter=200,
tol=1e-8,
cg_tol=1e-7,
n_samples=1500),
param_grid=param_grid,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=2,
refit=True,
verbose=True)
grid_cv.number_of_combinations() # 180
grid_cv.fit(X, y);
# Training GPClassifier on 1195 samples x 20 features.
# 2-fold CV for each of 180 params combinations == 360 fits ...
# iter: 1/360 +-convergence is not reached
# elapsed: 8.978 sec ...
# iter: 2/360 ++convergence is not reached
# elapsed: 17.204 sec - mean acc.: 0.1138 +/- 2 * 0.002
# iter: 3/360 +-convergence is not reached
# elapsed: 26.859 sec - best acc.: 0.1138 at {'kernel_params': {'alpha': 1.0, 'sigma': 0.1, 'l': 0.10000000000000001}, 'sigma_n': 0.0}
# ...
# ...
# ...
# iter: 358/360 ++convergence is not reached
# elapsed: 2948.18 sec - mean acc.: 0.1121 +/- 2 * 0.000
# iter: 359/360 +-convergence is not reached
# elapsed: 2959.30 sec - best acc.: 0.8025 at {'kernel_params': {'alpha': 100.0, 'sigma': 0.1, 'l': 2.3101297000831593}, 'sigma_n': 0.0}
# iter: 360/360 ++convergence is not reached
# elapsed: 2971.48 sec - mean acc.: 0.1121 +/- 2 * 0.000
In [20]:
df = grid_cv.to_df()
df.to_excel('cv_results/gp_rq_full.xlsx')
df.sort_values(by='mean_score', ascending=False).head(64).to_excel('cv_results/gp_rq_best.xlsx')
In [ ]:
X = np.load('data/train_feats.npy')
_, y = load_mnist('train', 'data/')
tts = TrainTestSplitter(shuffle=True, random_seed=1337)
indices, _ = tts.split(y, train_ratio=1300./60000., stratify=True)
y = y[indices]
y = one_hot(y)
X = X[indices]
# sigma_n = [0., 1e-4, 1e-2]
sigma_n = [0, 1e-8, 1e-6]
# sigma_f = [0.1, 1., 10.]
# sigma_f = np.logspace(-1., 1., 5)
sigma_f = np.logspace(-0.9, -0.2, 5)
# length_scale = np.logspace(-1., 2., 19)
# gamm = 0.5/length_scale**2
# gamma = np.logspace(-4., -2.1, 19)
gamma = np.logspace(-3.7, -3., 11)
param_grid = [{'sigma_n': sigma_n,
'kernel_params': [dict(sigma=sigma, gamma=gamma_) for gamma_ in gamma]} for sigma in sigma_f]
grid_cv = GridSearchCV(model=GPClassifier(algorithm='cg',
random_seed=1337,
max_iter=200,
tol=1e-8,
cg_tol=1e-7,
n_samples=1500),
param_grid=param_grid,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=2,
refit=True,
verbose=True)
grid_cv.number_of_combinations() # 44
grid_cv.fit(X, y);
# Training GPClassifier on 1295 samples x 128 features.
# 2-fold CV for each of 171 params combinations == 342 fits ...
# iter: 1/342 +- elapsed: 3.584 sec ...
# ...
# ...
# ...
# iter: 226/342 ++ elapsed: 4405.15 sec - mean acc.: 0.1042 +/- 2 * 0.001
# iter: 227/342 +- elapsed: 4432.84 sec - best acc.: 0.9846 at {'kernel_params': {'sigma': 0.1, 'gamma': 0.00050000000000000001}, 'sigma_n': 0.0}
# iter: 228/342 ++ elapsed: 4460.72 sec - mean acc.: 0.1042 +/- 2 * 0.001
# --------------------------------------------------------------
# Training GPClassifier on 1295 samples x 128 features.
# 2-fold CV for each of 285 params combinations == 570 fits ...
# iter: 1/570 +- elapsed: 28.242 sec ...
# iter: 2/570 ++ elapsed: 51.745 sec - mean acc.: 0.9799 +/- 2 * 0.005
# ...
# ...
# ...
# iter: 418/570 ++ elapsed: 9589.25 sec - mean acc.: 0.5370 +/- 2 * 0.119
# iter: 419/570 +- elapsed: 9605.40 sec - best acc.: 0.9861 at {'kernel_params': {'sigma': 0.31622776601683794, 'gamma': 0.00033711476775509616}, 'sigma_n': 0.0}
# iter: 420/570 ++ elapsed: 9620.88 sec - mean acc.: 0.6041 +/- 2 * 0.119
# ---------------------------------------------------------------
# Training GPClassifier on 1096 samples x 128 features.
# 2-fold CV for each of 165 params combinations == 330 fits ...
# iter: 1/330 +-convergence is not reached
# elapsed: 21.402 sec ...
# iter: 2/330 ++convergence is not reached
# elapsed: 41.644 sec - mean acc.: 0.9845 +/- 2 * 0.006
# iter: 3/330 +-convergence is not reached
# elapsed: 59.386 sec - best acc.: 0.9845 at {'kernel_params': {'sigma': 0.12589254117941673, 'gamma': 0.00019952623149688788}, 'sigma_n': 0}
# ...
# ...
# ...
# iter: 328/330 ++convergence is not reached
# elapsed: 7163.06 sec - mean acc.: 0.8219 +/- 2 * 0.129
# iter: 329/330 +-convergence is not reached
# elapsed: 7184.70 sec - best acc.: 0.9899 at {'kernel_params': {'sigma': 0.42169650342858211, 'gamma': 0.00085113803820237679}, 'sigma_n': 0}
# iter: 330/330 ++convergence is not reached
# elapsed: 7208.78 sec - mean acc.: 0.8219 +/- 2 * 0.129
In [ ]:
df = grid_cv.to_df()
df.to_excel('cv_results/gp_nn_full.xlsx')
df.sort_values(by='mean_score', ascending=False).to_excel('cv_results/gp_nn_best.xlsx')
In [ ]:
X = np.load('data/rbm_train.npy')
_, y = load_mnist('train', 'data/')
tts = TrainTestSplitter(shuffle=True, random_seed=1337)
indices, _ = tts.split(y, train_ratio=1100./60000., stratify=True)
X = X[indices]
y = y[indices]
y = one_hot(y)
sigma_n = [0.]
sigma_f = [0.1]
length_scale = np.logspace(-1., 2., 13)
# gamma = np.logspace(-3.7, -3., 11)
gamma = np.logspace(-5., -0., 19)
param_grid = [{'sigma_n': sigma_n,
'kernel_params': [dict(sigma=sigma, gamma=gamma_) for gamma_ in gamma]} for sigma in sigma_f]
grid_cv = GridSearchCV(model=GPClassifier(algorithm='cg',
random_seed=1337,
max_iter=200,
tol=1e-8,
cg_tol=1e-7,
n_samples=1500),
param_grid=param_grid,
train_test_splitter_params=dict(shuffle=True, random_seed=1337),
n_splits=2,
refit=True,
verbose=True)
print grid_cv.number_of_combinations()
grid_cv.fit(X, y)
#
# [*] 0.683... D:
In [ ]:
df = grid_cv.to_df()
df.to_excel('cv_results/gp_rbm_full.xlsx')
df.sort_values(by='mean_score', ascending=False).to_excel('cv_results/gp_rbm_best.xlsx')
In [2]:
_, y_test = load_mnist('test', 'data/')
# knn_pred = np.load('data/knn_pred.npy')
# nn_pred = unhot(np.load('data/nn_pred.npy'))
# logreg_pred = unhot(np.load('data/logreg_pred.npy'))
gp_pred = unhot(np.load('data/gp_pred.npy'))
In [3]:
# C = confusion_matrix(y_test, knn_pred)
# ax = plot_confusion_matrix(C)
# plt.title("Confusion matrix for k-NN model", fontsize=18)
# plt.savefig('confusion_matrix_knn.png', dpi=144)
# C = confusion_matrix(y_test, knn_pred)
# ax = plot_confusion_matrix(C)
# plt.title("Confusion matrix for k-NN model", fontsize=18)
# plt.savefig('confusion_matrix_knn.png', dpi=144)
# C = confusion_matrix(y_test, nn_pred)
# ax = plot_confusion_matrix(C)
# plt.title("Confusion matrix for NN model", fontsize=18)
# plt.savefig('confusion_matrix_nn.png', dpi=144)
# C = confusion_matrix(y_test, logreg_pred)
# ax = plot_confusion_matrix(C)
# plt.title("Confusion matrix for LogReg model", fontsize=18)
# plt.savefig('confusion_matrix_logreg.png', dpi=144)
C = confusion_matrix(y_test, gp_pred)
ax = plot_confusion_matrix(C)
plt.title("Confusion matrix for GP model", fontsize=18)
plt.savefig('confusion_matrix_gp.png', dpi=144)