In [ ]:
import numpy as np
import copy
import bayes_logistic as bl
from sklearn import datasets
In [ ]:
# Digit we want to classify
DIGIT = 1
In [ ]:
# Prep the MNIST image classification data. The data is divided into training,
# cross validation (cv) and test sets
TRAINING_PERCENTAGE = 0.6
CV_PERCENTAGE = 0.2
TEST_PERCENTAGE = 0.2
digits = datasets.load_digits()
n_samples, n_features = digits.data.shape
# Add bias column to the feature matrix
X = np.ones((n_samples, n_features + 1))
X[:, 1:] = digits.data
# Change the vector to binary data
y = digits.target
for idx in range(0, y.shape[0]):
y[idx] = 1.0 if y[idx] == DIGIT else 0.0
training_cnt = int(n_samples*TRAINING_PERCENTAGE)
training_X = X[:training_cnt,:]
training_y = y[:training_cnt]
cv_cnt = int(n_samples*CV_PERCENTAGE)
cv_X = X[training_cnt:(training_cnt + cv_cnt),:]
cv_y = y[training_cnt:(training_cnt + cv_cnt)]
test_cnt = int(n_samples*TEST_PERCENTAGE)
test_X = X[(training_cnt + cv_cnt):,:]
test_y = y[(training_cnt + cv_cnt):]
In [ ]:
# Train the model
GD_BATCH_SIZE = training_cnt
ITERATION_CNT = 1
w = training_X.shape[1]
w_prior = np.zeros(w)
H_prior = np.diag(np.ones(w))*0.001
for i in range(0, ITERATION_CNT):
for idx in range(0, training_cnt, GD_BATCH_SIZE):
batch_size = GD_BATCH_SIZE if (idx + GD_BATCH_SIZE) < training_cnt else training_cnt - idx
w_posterior, H_posterior = bl.fit_bayes_logistic(training_y[idx:batch_size],
training_X[idx:batch_size,:],
w_prior, H_prior, solver = 'BFGS')
w_prior = copy.copy(w_posterior)
H_prior = copy.copy(H_posterior)
# Perform Cross validation
y_cnt = 0
cv_probs = bl.bayes_logistic_prob(cv_X, w_prior, H_prior)
prediction_cnt = 0
for idx in range(0, cv_cnt):
if cv_probs[idx] > 0.9 and cv_y[idx] == 1:
prediction_cnt += 1
y_cnt += 1
prediction_accuracy = (100.0*prediction_cnt)/y_cnt
print "Prediction Accuracy for cross validation set %.02f" % prediction_accuracy
In [ ]:
# Perform Test
y_cnt = 0
test_probs = bl.bayes_logistic_prob(test_X, w_prior, H_prior)
prediction_cnt = 0
for idx in range(0, test_cnt):
if cv_probs[idx] > 0.5 and cv_y[idx] == 1:
prediction_cnt += 1
y_cnt += 1
prediction_accuracy = (100.0*prediction_cnt)/y_cnt
print "Prediction Accuracy for test set %.02f" % prediction_accuracy