notebook.community

Edit and run



In [1]:

    
%matplotlib inline
from __future__ import division, print_function
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
import matplotlib.pyplot as plt
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
import numpy as np
import time



In [2]:

    
mnist_dir = '../dat'
dat = read_data_sets(mnist_dir)









    



Extracting ../dat/train-images-idx3-ubyte.gz
Extracting ../dat/train-labels-idx1-ubyte.gz
Extracting ../dat/t10k-images-idx3-ubyte.gz
Extracting ../dat/t10k-labels-idx1-ubyte.gz



In [3]:

    
train_size = 100
sss = StratifiedShuffleSplit(dat.train.labels,train_size=train_size, test_size=dat.train.labels.shape[0] - train_size, n_iter=1, random_state=1234)
i_tr, i_unlab = [i for i in sss][0]
Xtr = dat.train.images[i_tr]
ytr = dat.train.labels[i_tr]
Xunl = dat.train.images[i_unlab]
#yunl = dat.train.labels[i_unlab]
Xval = dat.validation.images
yval = dat.validation.labels



In [4]:

    
from sklearn.linear_model import LogisticRegression



In [26]:

    
# initialise
yunl = np.zeros((Xunl.shape[0],), dtype=int)
lr = LogisticRegression()
new_confident = np.zeros((Xunl.shape[0],), dtype=bool)
already_confident = np.zeros((Xunl.shape[0],), dtype=bool)
verbose = True

# set and train on labels at descending confidence threshold
for thres in [.95, .9, .8, .7, .6, .5, .4, .3, .2, .1, 0]:
    lr.fit(np.vstack((Xtr, Xunl[new_confident])), np.hstack((ytr, yunl[new_confident])))
    probs = lr.predict_proba(Xunl)
    new_confident = (probs.max(axis=1) >= thres) & ~ already_confident
    yunl[new_confident] = probs[new_confident].argmax(axis=1)
    already_confident = already_confident | new_confident
    if verbose:
        print('trained with threshold: {}, now confident of {} of the dataset'.format(thres, already_confident.mean()))









    



trained with threshold: 0.95, now confident of 0.0555555555556 of the dataset
trained with threshold: 0.9, now confident of 0.363369763206 of the dataset
trained with threshold: 0.8, now confident of 0.659508196721 of the dataset
trained with threshold: 0.7, now confident of 0.778178506375 of the dataset
trained with threshold: 0.6, now confident of 0.875792349727 of the dataset
trained with threshold: 0.5, now confident of 0.983479052823 of the dataset
trained with threshold: 0.4, now confident of 0.999344262295 of the dataset
trained with threshold: 0.3, now confident of 0.999981785064 of the dataset
trained with threshold: 0.2, now confident of 1.0 of the dataset
trained with threshold: 0.1, now confident of 1.0 of the dataset
trained with threshold: 0, now confident of 1.0 of the dataset



In [40]:

    
lr.fit(Xtr, ytr)
lr.score(Xval, yval)









    Out[40]:





0.72840000000000005



In [44]:

    
clf = LogisticRegression()
clf.fit(np.vstack((Xtr, Xunl)), np.hstack((ytr, yunl)))
clf.score(Xval, yval)









    Out[44]:





0.76300000000000001



In [48]:

    
(yunl==dat.train.labels[i_unlab]).mean()









    Out[48]:





0.75788706739526412

Try with SVM



In [49]:

    
from sklearn.svm import SVC



In [51]:

    
# initialise
yunl = np.zeros((Xunl.shape[0],), dtype=int)
svm = SVC(kernel='rbf')
new_confident = np.zeros((Xunl.shape[0],), dtype=bool)
already_confident = np.zeros((Xunl.shape[0],), dtype=bool)
verbose = True

# set and train on labels at descending confidence threshold
for thres in [.95, .9, .8, .7, .6, .5, .4, .3, .2, .1, 0]:
    svm.fit(np.vstack((Xtr, Xunl[new_confident])), np.hstack((ytr, yunl[new_confident])))
    probs = lr.predict_proba(Xunl)
    new_confident = (probs.max(axis=1) >= thres) & ~ already_confident
    yunl[new_confident] = probs[new_confident].argmax(axis=1)
    already_confident = already_confident | new_confident
    if verbose:
        print('trained with threshold: {}, now confident of {} of the dataset'.format(thres, already_confident.mean()))









    



trained with threshold: 0.95, now confident of 0.0555555555556 of the dataset
trained with threshold: 0.9, now confident of 0.145974499089 of the dataset
trained with threshold: 0.8, now confident of 0.314335154827 of the dataset
trained with threshold: 0.7, now confident of 0.461111111111 of the dataset
trained with threshold: 0.6, now confident of 0.599253187614 of the dataset
trained with threshold: 0.5, now confident of 0.747559198543 of the dataset
trained with threshold: 0.4, now confident of 0.892131147541 of the dataset
trained with threshold: 0.3, now confident of 0.976575591985 of the dataset
trained with threshold: 0.2, now confident of 0.999398907104 of the dataset
trained with threshold: 0.1, now confident of 1.0 of the dataset
trained with threshold: 0, now confident of 1.0 of the dataset



In [52]:

    
svm.score(Xval, yval)









    Out[52]:





0.20499999999999999



In [53]:

    
svm.fit(Xtr, ytr)
svm.score(Xval, yval)









    Out[53]:





0.34960000000000002



In [ ]:

    
svm.fit(np.vstack((Xtr, Xunl)), np.hstack((ytr, yunl)))
svm.score(Xval, yval)