In [1]:
%matplotlib inline
from __future__ import division, print_function
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
import matplotlib.pyplot as plt
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
import numpy as np
import time
In [2]:
mnist_dir = '../dat'
dat = read_data_sets(mnist_dir)
In [3]:
train_size = 100
sss = StratifiedShuffleSplit(dat.train.labels,train_size=train_size, test_size=dat.train.labels.shape[0] - train_size, n_iter=1, random_state=1234)
i_tr, i_unlab = [i for i in sss][0]
Xtr = dat.train.images[i_tr]
ytr = dat.train.labels[i_tr]
Xunl = dat.train.images[i_unlab]
#yunl = dat.train.labels[i_unlab]
Xval = dat.validation.images
yval = dat.validation.labels
In [4]:
from sklearn.linear_model import LogisticRegression
In [26]:
# initialise
yunl = np.zeros((Xunl.shape[0],), dtype=int)
lr = LogisticRegression()
new_confident = np.zeros((Xunl.shape[0],), dtype=bool)
already_confident = np.zeros((Xunl.shape[0],), dtype=bool)
verbose = True
# set and train on labels at descending confidence threshold
for thres in [.95, .9, .8, .7, .6, .5, .4, .3, .2, .1, 0]:
lr.fit(np.vstack((Xtr, Xunl[new_confident])), np.hstack((ytr, yunl[new_confident])))
probs = lr.predict_proba(Xunl)
new_confident = (probs.max(axis=1) >= thres) & ~ already_confident
yunl[new_confident] = probs[new_confident].argmax(axis=1)
already_confident = already_confident | new_confident
if verbose:
print('trained with threshold: {}, now confident of {} of the dataset'.format(thres, already_confident.mean()))
In [40]:
lr.fit(Xtr, ytr)
lr.score(Xval, yval)
Out[40]:
In [44]:
clf = LogisticRegression()
clf.fit(np.vstack((Xtr, Xunl)), np.hstack((ytr, yunl)))
clf.score(Xval, yval)
Out[44]:
In [48]:
(yunl==dat.train.labels[i_unlab]).mean()
Out[48]:
In [49]:
from sklearn.svm import SVC
In [51]:
# initialise
yunl = np.zeros((Xunl.shape[0],), dtype=int)
svm = SVC(kernel='rbf')
new_confident = np.zeros((Xunl.shape[0],), dtype=bool)
already_confident = np.zeros((Xunl.shape[0],), dtype=bool)
verbose = True
# set and train on labels at descending confidence threshold
for thres in [.95, .9, .8, .7, .6, .5, .4, .3, .2, .1, 0]:
svm.fit(np.vstack((Xtr, Xunl[new_confident])), np.hstack((ytr, yunl[new_confident])))
probs = lr.predict_proba(Xunl)
new_confident = (probs.max(axis=1) >= thres) & ~ already_confident
yunl[new_confident] = probs[new_confident].argmax(axis=1)
already_confident = already_confident | new_confident
if verbose:
print('trained with threshold: {}, now confident of {} of the dataset'.format(thres, already_confident.mean()))
In [52]:
svm.score(Xval, yval)
Out[52]:
In [53]:
svm.fit(Xtr, ytr)
svm.score(Xval, yval)
Out[53]:
In [ ]:
svm.fit(np.vstack((Xtr, Xunl)), np.hstack((ytr, yunl)))
svm.score(Xval, yval)