Convolutional Neural Networks

Standard LeNet5 with PyTorch

Xavier Bresson, Sept. 2017

Implementation of original LeNet5 Convolutional Neural Networks:
Gradient-based learning applied to document recognition
Y LeCun, L Bottou, Y Bengio, P Haffner
Proceedings of the IEEE 86 (11), 2278-2324


In [1]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import pdb #pdb.set_trace()
import collections
import time
import numpy as np

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

if torch.cuda.is_available():
    print('cuda available')
    dtypeFloat = torch.cuda.FloatTensor
    dtypeLong = torch.cuda.LongTensor
    torch.cuda.manual_seed(1)
else:
    print('cuda not available')
    dtypeFloat = torch.FloatTensor
    dtypeLong = torch.LongTensor
    torch.manual_seed(1)


cuda available

MNIST


In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('datasets', one_hot=False) # load data in folder datasets/

train_data = mnist.train.images.astype(np.float32)
val_data = mnist.validation.images.astype(np.float32)
test_data = mnist.test.images.astype(np.float32)
train_labels = mnist.train.labels
val_labels = mnist.validation.labels
test_labels = mnist.test.labels
print(train_data.shape)
print(train_labels.shape)
print(val_data.shape)
print(val_labels.shape)
print(test_data.shape)
print(test_labels.shape)


Extracting datasets/train-images-idx3-ubyte.gz
Extracting datasets/train-labels-idx1-ubyte.gz
Extracting datasets/t10k-images-idx3-ubyte.gz
Extracting datasets/t10k-labels-idx1-ubyte.gz
(55000, 784)
(55000,)
(5000, 784)
(5000,)
(10000, 784)
(10000,)

ConvNet LeNet5

Layers: CL32-MP4-CL64-MP4-FC512-FC10


In [3]:
# class definition
class ConvNet_LeNet5(nn.Module):
    
    def __init__(self, net_parameters):
        
        print('ConvNet: LeNet5\n')
        
        super(ConvNet_LeNet5, self).__init__()
        
        Nx, Ny, CL1_F, CL1_K, CL2_F, CL2_K, FC1_F, FC2_F = net_parameters
        FC1Fin = CL2_F*(Nx//4)**2
        
        # graph CL1
        self.conv1 = nn.Conv2d(1, CL1_F, CL1_K, padding=(2, 2))
        Fin = CL1_K**2; Fout = CL1_F;
        scale = np.sqrt( 2.0/ (Fin+Fout) )
        self.conv1.weight.data.uniform_(-scale, scale)
        self.conv1.bias.data.fill_(0.0)
        
        # graph CL2
        self.conv2 = nn.Conv2d(CL1_F, CL2_F, CL2_K, padding=(2, 2))
        Fin = CL1_F*CL2_K**2; Fout = CL2_F;
        scale = np.sqrt( 2.0/ (Fin+Fout) )
        self.conv2.weight.data.uniform_(-scale, scale)
        self.conv2.bias.data.fill_(0.0)
        
        # FC1
        self.fc1 = nn.Linear(FC1Fin, FC1_F) 
        Fin = FC1Fin; Fout = FC1_F;
        scale = np.sqrt( 2.0/ (Fin+Fout) )
        self.fc1.weight.data.uniform_(-scale, scale)
        self.fc1.bias.data.fill_(0.0)
        self.FC1Fin = FC1Fin
        
        # FC2
        self.fc2 = nn.Linear(FC1_F, FC2_F)
        Fin = FC1_F; Fout = FC2_F;
        scale = np.sqrt( 2.0/ (Fin+Fout) )
        self.fc2.weight.data.uniform_(-scale, scale)
        self.fc2.bias.data.fill_(0.0)
        
        # max pooling
        self.pool = nn.MaxPool2d(2, 2)
         
        
    def forward(self, x, d):
        
        # CL1
        x = self.conv1(x)    
        x = F.relu(x)
        x = self.pool(x)

        # CL2
        x = self.conv2(x)    
        x = F.relu(x)
        x = self.pool(x)

        # FC1
        x = x.permute(0,3,2,1).contiguous() # reshape from pytorch array to tensorflow array
        x = x.view(-1, self.FC1Fin)
        x = self.fc1(x)
        x = F.relu(x)
        x  = nn.Dropout(d)(x)
        
        # FC2
        x = self.fc2(x)
            
        return x
        
        
    def loss(self, y, y_target, l2_regularization):
    
        loss = nn.CrossEntropyLoss()(y,y_target)

        l2_loss = 0.0
        for param in self.parameters():
            data = param* param
            l2_loss += data.sum()
           
        loss += 0.5* l2_regularization* l2_loss
            
        return loss
    
    
    def update(self, lr):
                
        update = torch.optim.SGD( self.parameters(), lr=lr, momentum=0.9 )
        
        return update
        
           
    def update_learning_rate(self, optimizer, lr):
   
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        return optimizer

    
    def evaluation(self, y_predicted, test_l):
    
        _, class_predicted = torch.max(y_predicted.data, 1)
        return 100.0* (class_predicted == test_l).sum()/ y_predicted.size(0)

In [4]:
# Delete existing network if exists
try:
    del net
    print('Delete existing network\n')
except NameError:
    print('No existing network to delete\n')



# network parameters
Nx = Ny = 28
CL1_F = 32
CL1_K = 5
CL2_F = 64
CL2_K = 5
FC1_F = 512
FC2_F = 10
net_parameters = [Nx, Ny, CL1_F, CL1_K, CL2_F, CL2_K, FC1_F, FC2_F]


# instantiate the object net of the class 
net = ConvNet_LeNet5(net_parameters)
if torch.cuda.is_available():
    net.cuda()
print(net)


# Weights
L = list(net.parameters())


# learning parameters
learning_rate = 0.05
dropout_value = 0.5
l2_regularization = 5e-4 
batch_size = 100
num_epochs = 20
train_size = train_data.shape[0]
nb_iter = int(num_epochs * train_size) // batch_size
print('num_epochs=',num_epochs,', train_size=',train_size,', nb_iter=',nb_iter)


# Optimizer
global_lr = learning_rate
global_step = 0
decay = 0.95
decay_steps = train_size
lr = learning_rate
optimizer = net.update(lr) 


# loop over epochs
indices = collections.deque()
for epoch in range(num_epochs):  # loop over the dataset multiple times

    # reshuffle 
    indices.extend(np.random.permutation(train_size)) # rand permutation
    
    # reset time
    t_start = time.time()
    
    # extract batches
    running_loss = 0.0
    running_accuray = 0
    running_total = 0
    while len(indices) >= batch_size:
        
        # extract batches
        batch_idx = [indices.popleft() for i in range(batch_size)]
        train_x, train_y = train_data[batch_idx,:].T, train_labels[batch_idx].T
        train_x = np.reshape(train_x,[28,28,batch_size])[:,:,:,None]
        train_x = np.transpose(train_x,[2,3,1,0]) # reshape from pytorch array to tensorflow array
        train_x = Variable( torch.FloatTensor(train_x).type(dtypeFloat) , requires_grad=False) 
        train_y = train_y.astype(np.int64)
        train_y = torch.LongTensor(train_y).type(dtypeLong)
        train_y = Variable( train_y , requires_grad=False) 
            
        # Forward 
        y = net.forward(train_x, dropout_value)
        loss = net.loss(y,train_y,l2_regularization) 
        loss_train = loss.data[0]
        
        # Accuracy
        acc_train = net.evaluation(y,train_y.data)
        
        # backward
        loss.backward()
        
        # Update 
        global_step += batch_size # to update learning rate
        optimizer.step()
        optimizer.zero_grad()
        
        # loss, accuracy
        running_loss += loss_train
        running_accuray += acc_train
        running_total += 1
        
        # print        
        if not running_total%100: # print every x mini-batches
            print('epoch= %d, i= %4d, loss(batch)= %.4f, accuray(batch)= %.2f' % (epoch+1, running_total, loss_train, acc_train))
          
       
    # print 
    t_stop = time.time() - t_start
    print('epoch= %d, loss(train)= %.3f, accuracy(train)= %.3f, time= %.3f, lr= %.5f' % 
          (epoch+1, running_loss/running_total, running_accuray/running_total, t_stop, lr))
 

    # update learning rate 
    lr = global_lr * pow( decay , float(global_step// decay_steps) )
    optimizer = net.update_learning_rate(optimizer, lr)
    
    
    # Test set
    running_accuray_test = 0
    running_total_test = 0
    indices_test = collections.deque()
    indices_test.extend(range(test_data.shape[0]))
    t_start_test = time.time()
    while len(indices_test) >= batch_size:
        batch_idx_test = [indices_test.popleft() for i in range(batch_size)]
        test_x, test_y = test_data[batch_idx_test,:].T, test_labels[batch_idx_test].T
        test_x = np.reshape(test_x,[28,28,batch_size])[:,:,:,None]
        test_x = np.transpose(test_x,[2,3,1,0]) # reshape from pytorch array to tensorflow array
        test_x = Variable( torch.FloatTensor(test_x).type(dtypeFloat) , requires_grad=False) 
        y = net.forward(test_x, 0.0) 
        test_y = test_y.astype(np.int64)
        test_y = torch.LongTensor(test_y).type(dtypeLong)
        test_y = Variable( test_y , requires_grad=False) 
        acc_test = net.evaluation(y,test_y.data)
        running_accuray_test += acc_test
        running_total_test += 1
    t_stop_test = time.time() - t_start_test
    print('  accuracy(test) = %.3f %%, time= %.3f' % (running_accuray_test / running_total_test, t_stop_test))


No existing network to delete

ConvNet: LeNet5

ConvNet_LeNet5 (
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (fc1): Linear (3136 -> 512)
  (fc2): Linear (512 -> 10)
  (pool): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
)
num_epochs= 20 , train_size= 55000 , nb_iter= 11000
epoch= 1, i=  100, loss(batch)= 0.2640, accuray(batch)= 94.00
epoch= 1, i=  200, loss(batch)= 0.2147, accuray(batch)= 97.00
epoch= 1, i=  300, loss(batch)= 0.2581, accuray(batch)= 94.00
epoch= 1, i=  400, loss(batch)= 0.1280, accuray(batch)= 100.00
epoch= 1, i=  500, loss(batch)= 0.2204, accuray(batch)= 96.00
epoch= 1, loss(train)= 0.305, accuracy(train)= 93.693, time= 7.688, lr= 0.05000
  accuracy(test) = 98.660 %, time= 0.331
epoch= 2, i=  100, loss(batch)= 0.1557, accuray(batch)= 98.00
epoch= 2, i=  200, loss(batch)= 0.1801, accuray(batch)= 97.00
epoch= 2, i=  300, loss(batch)= 0.1663, accuray(batch)= 97.00
epoch= 2, i=  400, loss(batch)= 0.1095, accuray(batch)= 99.00
epoch= 2, i=  500, loss(batch)= 0.1311, accuray(batch)= 99.00
epoch= 2, loss(train)= 0.153, accuracy(train)= 98.324, time= 6.966, lr= 0.04750
  accuracy(test) = 98.670 %, time= 0.331
epoch= 3, i=  100, loss(batch)= 0.0953, accuray(batch)= 100.00
epoch= 3, i=  200, loss(batch)= 0.0909, accuray(batch)= 100.00
epoch= 3, i=  300, loss(batch)= 0.1151, accuray(batch)= 98.00
epoch= 3, i=  400, loss(batch)= 0.0906, accuray(batch)= 100.00
epoch= 3, i=  500, loss(batch)= 0.1811, accuray(batch)= 96.00
epoch= 3, loss(train)= 0.128, accuracy(train)= 98.700, time= 6.985, lr= 0.04512
  accuracy(test) = 99.020 %, time= 0.331
epoch= 4, i=  100, loss(batch)= 0.0937, accuray(batch)= 100.00
epoch= 4, i=  200, loss(batch)= 0.1036, accuray(batch)= 100.00
epoch= 4, i=  300, loss(batch)= 0.1260, accuray(batch)= 99.00
epoch= 4, i=  400, loss(batch)= 0.1336, accuray(batch)= 96.00
epoch= 4, i=  500, loss(batch)= 0.1960, accuray(batch)= 98.00
epoch= 4, loss(train)= 0.110, accuracy(train)= 99.016, time= 6.964, lr= 0.04287
  accuracy(test) = 99.140 %, time= 0.331
epoch= 5, i=  100, loss(batch)= 0.1039, accuray(batch)= 98.00
epoch= 5, i=  200, loss(batch)= 0.0830, accuray(batch)= 100.00
epoch= 5, i=  300, loss(batch)= 0.1148, accuray(batch)= 99.00
epoch= 5, i=  400, loss(batch)= 0.0776, accuray(batch)= 100.00
epoch= 5, i=  500, loss(batch)= 0.0837, accuray(batch)= 99.00
epoch= 5, loss(train)= 0.098, accuracy(train)= 99.133, time= 6.945, lr= 0.04073
  accuracy(test) = 99.140 %, time= 0.332
epoch= 6, i=  100, loss(batch)= 0.0686, accuray(batch)= 100.00
epoch= 6, i=  200, loss(batch)= 0.0952, accuray(batch)= 99.00
epoch= 6, i=  300, loss(batch)= 0.0655, accuray(batch)= 100.00
epoch= 6, i=  400, loss(batch)= 0.0852, accuray(batch)= 99.00
epoch= 6, i=  500, loss(batch)= 0.0977, accuray(batch)= 98.00
epoch= 6, loss(train)= 0.089, accuracy(train)= 99.222, time= 6.999, lr= 0.03869
  accuracy(test) = 99.250 %, time= 0.332
epoch= 7, i=  100, loss(batch)= 0.0697, accuray(batch)= 99.00
epoch= 7, i=  200, loss(batch)= 0.0779, accuray(batch)= 100.00
epoch= 7, i=  300, loss(batch)= 0.0649, accuray(batch)= 100.00
epoch= 7, i=  400, loss(batch)= 0.0802, accuray(batch)= 99.00
epoch= 7, i=  500, loss(batch)= 0.0695, accuray(batch)= 100.00
epoch= 7, loss(train)= 0.081, accuracy(train)= 99.282, time= 6.983, lr= 0.03675
  accuracy(test) = 99.250 %, time= 0.332
epoch= 8, i=  100, loss(batch)= 0.0647, accuray(batch)= 99.00
epoch= 8, i=  200, loss(batch)= 0.0621, accuray(batch)= 100.00
epoch= 8, i=  300, loss(batch)= 0.0597, accuray(batch)= 100.00
epoch= 8, i=  400, loss(batch)= 0.0584, accuray(batch)= 100.00
epoch= 8, i=  500, loss(batch)= 0.0705, accuray(batch)= 99.00
epoch= 8, loss(train)= 0.075, accuracy(train)= 99.389, time= 6.964, lr= 0.03492
  accuracy(test) = 99.270 %, time= 0.332
epoch= 9, i=  100, loss(batch)= 0.0586, accuray(batch)= 100.00
epoch= 9, i=  200, loss(batch)= 0.0761, accuray(batch)= 99.00
epoch= 9, i=  300, loss(batch)= 0.0554, accuray(batch)= 100.00
epoch= 9, i=  400, loss(batch)= 0.0877, accuray(batch)= 98.00
epoch= 9, i=  500, loss(batch)= 0.0639, accuray(batch)= 100.00
epoch= 9, loss(train)= 0.070, accuracy(train)= 99.455, time= 6.962, lr= 0.03317
  accuracy(test) = 99.320 %, time= 0.332
epoch= 10, i=  100, loss(batch)= 0.0797, accuray(batch)= 99.00
epoch= 10, i=  200, loss(batch)= 0.0581, accuray(batch)= 100.00
epoch= 10, i=  300, loss(batch)= 0.0611, accuray(batch)= 99.00
epoch= 10, i=  400, loss(batch)= 0.0520, accuray(batch)= 100.00
epoch= 10, i=  500, loss(batch)= 0.0996, accuray(batch)= 98.00
epoch= 10, loss(train)= 0.065, accuracy(train)= 99.482, time= 6.952, lr= 0.03151
  accuracy(test) = 99.150 %, time= 0.332
epoch= 11, i=  100, loss(batch)= 0.0489, accuray(batch)= 100.00
epoch= 11, i=  200, loss(batch)= 0.0475, accuray(batch)= 100.00
epoch= 11, i=  300, loss(batch)= 0.0547, accuray(batch)= 100.00
epoch= 11, i=  400, loss(batch)= 0.0627, accuray(batch)= 99.00
epoch= 11, i=  500, loss(batch)= 0.0535, accuray(batch)= 100.00
epoch= 11, loss(train)= 0.062, accuracy(train)= 99.518, time= 7.004, lr= 0.02994
  accuracy(test) = 99.400 %, time= 0.332
epoch= 12, i=  100, loss(batch)= 0.0476, accuray(batch)= 100.00
epoch= 12, i=  200, loss(batch)= 0.0720, accuray(batch)= 98.00
epoch= 12, i=  300, loss(batch)= 0.0556, accuray(batch)= 99.00
epoch= 12, i=  400, loss(batch)= 0.0575, accuray(batch)= 100.00
epoch= 12, i=  500, loss(batch)= 0.0795, accuray(batch)= 99.00
epoch= 12, loss(train)= 0.059, accuracy(train)= 99.604, time= 6.957, lr= 0.02844
  accuracy(test) = 99.380 %, time= 0.333
epoch= 13, i=  100, loss(batch)= 0.0440, accuray(batch)= 100.00
epoch= 13, i=  200, loss(batch)= 0.0507, accuray(batch)= 100.00
epoch= 13, i=  300, loss(batch)= 0.0643, accuray(batch)= 99.00
epoch= 13, i=  400, loss(batch)= 0.0956, accuray(batch)= 98.00
epoch= 13, i=  500, loss(batch)= 0.0489, accuray(batch)= 100.00
epoch= 13, loss(train)= 0.056, accuracy(train)= 99.602, time= 6.963, lr= 0.02702
  accuracy(test) = 99.360 %, time= 0.333
epoch= 14, i=  100, loss(batch)= 0.0465, accuray(batch)= 100.00
epoch= 14, i=  200, loss(batch)= 0.0441, accuray(batch)= 100.00
epoch= 14, i=  300, loss(batch)= 0.0504, accuray(batch)= 100.00
epoch= 14, i=  400, loss(batch)= 0.0433, accuray(batch)= 100.00
epoch= 14, i=  500, loss(batch)= 0.0581, accuray(batch)= 100.00
epoch= 14, loss(train)= 0.054, accuracy(train)= 99.605, time= 6.990, lr= 0.02567
  accuracy(test) = 99.330 %, time= 0.333
epoch= 15, i=  100, loss(batch)= 0.0475, accuray(batch)= 100.00
epoch= 15, i=  200, loss(batch)= 0.0428, accuray(batch)= 100.00
epoch= 15, i=  300, loss(batch)= 0.0437, accuray(batch)= 100.00
epoch= 15, i=  400, loss(batch)= 0.0945, accuray(batch)= 98.00
epoch= 15, i=  500, loss(batch)= 0.0798, accuray(batch)= 99.00
epoch= 15, loss(train)= 0.053, accuracy(train)= 99.613, time= 6.993, lr= 0.02438
  accuracy(test) = 99.320 %, time= 0.333
epoch= 16, i=  100, loss(batch)= 0.0403, accuray(batch)= 100.00
epoch= 16, i=  200, loss(batch)= 0.0571, accuray(batch)= 99.00
epoch= 16, i=  300, loss(batch)= 0.0444, accuray(batch)= 100.00
epoch= 16, i=  400, loss(batch)= 0.0985, accuray(batch)= 99.00
epoch= 16, i=  500, loss(batch)= 0.0457, accuray(batch)= 100.00
epoch= 16, loss(train)= 0.051, accuracy(train)= 99.669, time= 6.950, lr= 0.02316
  accuracy(test) = 99.300 %, time= 0.333
epoch= 17, i=  100, loss(batch)= 0.1177, accuray(batch)= 97.00
epoch= 17, i=  200, loss(batch)= 0.0484, accuray(batch)= 99.00
epoch= 17, i=  300, loss(batch)= 0.0496, accuray(batch)= 99.00
epoch= 17, i=  400, loss(batch)= 0.0426, accuray(batch)= 100.00
epoch= 17, i=  500, loss(batch)= 0.0460, accuray(batch)= 100.00
epoch= 17, loss(train)= 0.049, accuracy(train)= 99.695, time= 6.899, lr= 0.02201
  accuracy(test) = 99.380 %, time= 0.333
epoch= 18, i=  100, loss(batch)= 0.0480, accuray(batch)= 99.00
epoch= 18, i=  200, loss(batch)= 0.0446, accuray(batch)= 100.00
epoch= 18, i=  300, loss(batch)= 0.0415, accuray(batch)= 100.00
epoch= 18, i=  400, loss(batch)= 0.0408, accuray(batch)= 100.00
epoch= 18, i=  500, loss(batch)= 0.0376, accuray(batch)= 100.00
epoch= 18, loss(train)= 0.048, accuracy(train)= 99.704, time= 6.905, lr= 0.02091
  accuracy(test) = 99.350 %, time= 0.333
epoch= 19, i=  100, loss(batch)= 0.0413, accuray(batch)= 100.00
epoch= 19, i=  200, loss(batch)= 0.0397, accuray(batch)= 100.00
epoch= 19, i=  300, loss(batch)= 0.0434, accuray(batch)= 100.00
epoch= 19, i=  400, loss(batch)= 0.0434, accuray(batch)= 100.00
epoch= 19, i=  500, loss(batch)= 0.0458, accuray(batch)= 100.00
epoch= 19, loss(train)= 0.046, accuracy(train)= 99.725, time= 6.908, lr= 0.01986
  accuracy(test) = 99.390 %, time= 0.334
epoch= 20, i=  100, loss(batch)= 0.0365, accuray(batch)= 100.00
epoch= 20, i=  200, loss(batch)= 0.0454, accuray(batch)= 100.00
epoch= 20, i=  300, loss(batch)= 0.0360, accuray(batch)= 100.00
epoch= 20, i=  400, loss(batch)= 0.0598, accuray(batch)= 98.00
epoch= 20, i=  500, loss(batch)= 0.0364, accuray(batch)= 100.00
epoch= 20, loss(train)= 0.045, accuracy(train)= 99.736, time= 6.908, lr= 0.01887
  accuracy(test) = 99.310 %, time= 0.333

In [ ]: