In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.gridspec as gridspec
import pickle
import gzip
In [2]:
!wget http://www.cs.colostate.edu/~anderson/cs480/notebooks/nn4.tar
!tar xvf nn4.tar
In [3]:
import neuralnetworksbylayer as nn
Get data from DeepLearning Tutorial, or using next code cell.
In [5]:
!wget http://www.cs.colostate.edu/~anderson/cs480/notebooks/mnist.pkl.gz
In [4]:
with gzip.open('mnist.pkl.gz', 'rb') as f:
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
Xorig = np.vstack([a.reshape((28, 28, 1))[np.newaxis, :, :, :] for a in train_set[0]])
Torig = np.array(train_set[1]).reshape((-1,1))
Xtest = np.vstack([a.reshape((28,28,1))[np.newaxis,:,:,:] for a in test_set[0]])
Ttest = np.array(test_set[1]).reshape((-1,1))
Xorig.shape, Torig.shape, Xtest.shape, Ttest.shape
Out[4]:
In [5]:
plt.figure(figsize=(10,10))
for i in range(100):
plt.subplot(10,10,i+1)
plt.imshow(-Xorig[i,:].reshape((28,28)),interpolation='nearest',cmap='gray')
plt.axis('off')
plt.title(str(Torig[i][0]))
plt.tight_layout()
Try training with just 100 samples of each digit.
In [6]:
if False:
nEach = 100
useThese = []
for digit in range(10):
useThese += np.where(Torig == digit)[0][:nEach].tolist()
useThese = np.array(useThese)
np.random.shuffle(useThese)
X = Xorig[useThese,:]
T = Torig[useThese,:]
del Xorig # to save memory
del Yorig
else:
X = Xorig
T = Torig
X.shape, T.shape
Out[6]:
Make a neural network with the first layer being a convolutional layer of 20 units, with each unit learning a 5x5 matrix of weights to be applied to all 5x5 patches in the image with a stride of 1. The second layer is a usual fully-connected layer. The third layer is the usual fully-connected layer with 10 units. The final layer is a multinomial output layer.
In [7]:
imageSize = 28
nChannels = 1
nnet = nn.NeuralNetworkConvolutionalClassifier(
nUnits=[nChannels, 20, 10, 10, len(np.unique(T))],
inputSize=[imageSize, imageSize],
windowSizes=[[5, 5], [10, 10]],
windowStrides=[[1, 1], [2, 2]])
To avoid memory errors, train in minibatches.
Applying nnet.use to large matrix X or Xtest causes memory error. So, we will use the following function to compute the output of the nnet.use function with minibatches.
In [8]:
def useMinibatch(nnet,X,minibatchSize):
output = np.zeros((X.shape[0],1))
for first in range(0,X.shape[0], minibatchSize):
justThese = slice(first, first+minibatchSize)
output[justThese,:] = nnet.use(X[justThese,:])
return output
In [9]:
def trainMiniBatches(nReps):
likelihoods = []
minibatchIncrement = 1000
for reps in range(nReps):
roworder = np.arange(X.shape[0])
np.random.shuffle(roworder)
for first in range(0, X.shape[0], minibatchIncrement):
useThese = slice(first, first+minibatchIncrement)
nnet.train(X[roworder[useThese], :], T[roworder[useThese], :],nIterations=10, verbose=False)
likelihoods += nnet.getErrorTrace()
print('Minibatch from',first,'to',first+minibatchIncrement,'Final likel',likelihoods[-1])
p = useMinibatch(nnet,X,minibatchIncrement)
nTest = Xtest.shape[0] # 100
ptest = useMinibatch(nnet,Xtest,minibatchIncrement)
print("Rep {}. Fraction correct: Training {} Testing {}".format(reps,np.sum(p==T)/len(T),
np.sum(ptest==Ttest[:nTest,:])/nTest))
return likelihoods
In [10]:
import time
In [11]:
starttime = time.time()
likelihoods = trainMiniBatches(2)
print(time.time() - starttime,'seconds')
In [12]:
ptest = useMinibatch(nnet,Xtest,200)
nWrong = np.sum(ptest != Ttest)
print(nWrong, nWrong/len(Ttest))
In [13]:
plt.figure(figsize=(12,15))
gs = gridspec.GridSpec(14, 5)
plt.subplot(gs[0, 0:5])
plt.plot(nnet.getErrorTrace())
ndigits = 25
for i in range(ndigits):
plt.subplot(gs[1+int(i/5), i % 5])
plt.imshow(-Xtest[i, :].reshape((imageSize, imageSize)), interpolation='nearest', cmap=plt.cm.gray)
plt.title('P=' + str(ptest[i][0]))
plt.axis('off')
cvlay = nnet.layers[0]
cw = cvlay.W[1:,:]
nh = min(cvlay.nUnits,20)
for i in range(nh):
plt.subplot(gs[6+int(i/5),i%5])
plt.imshow(cw[:,i].reshape(cvlay.windowSizes), interpolation='nearest', cmap=plt.cm.gray)
plt.axis('off')
cy = cvlay.Y.reshape((-1,24,24,20))
for i in range(nh):
plt.subplot(gs[10+int(i/5),i%5])
plt.imshow(cy[0,:,:,i], interpolation='nearest', cmap=plt.cm.gray)
plt.axis('off')
plt.tight_layout()
In [14]:
def drawFirstLayerOutputs(nnet,X):
nnet.use(X)
nSamples = X.shape[0]
imageSize = int(np.sqrt(X.shape[1]))
cvlay = nnet.layers[0]
nUnits = cvlay.nUnits
wrow,wcol = cvlay.nWindows
y = cvlay.Y.reshape((-1,wrow,wcol,nUnits))
plt.figure(figsize=(12,2*nSamples))
gs = gridspec.GridSpec(nSamples*2,10)
for i in range(nSamples):
for h in range(nUnits):
plt.subplot(gs[i*2 + int(h/10), h % 10])
plt.imshow(-y[i,:,:,h], interpolation='nearest',cmap='gray')
plt.axis('off')
In [15]:
drawFirstLayerOutputs(nnet,Xtest[[10, 5, 1, 18, 4],:])
In [16]:
drawFirstLayerOutputs(nnet,Xtest[[8, 11, 0, 61, 20],:])
In [17]:
def drawSecondLayerOutputs(nnet,X):
nnet.use(X)
nSamples = X.shape[0]
imageSize = int(np.sqrt(X.shape[1]))
cvlay = nnet.layers[1]
nUnits = cvlay.nUnits
wrow,wcol = cvlay.nWindows
y = cvlay.Y.reshape((-1,wrow,wcol,nUnits))
plt.figure(figsize=(12,nSamples))
gs = gridspec.GridSpec(nSamples,10)
for i in range(nSamples):
for h in range(nUnits):
plt.subplot(gs[i*1, h])
plt.imshow(-y[i,:,:,h], interpolation='nearest',cmap='gray')
plt.axis('off')
In [18]:
drawSecondLayerOutputs(nnet,Xtest[[10, 5, 1, 18, 4, 8, 11, 0, 61, 20]])
In [ ]: