In [1]:
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from collections import namedtuple

from PIL import Image
import os
import os.path
import errno
import codecs
import copy

In [2]:
torch.manual_seed(0)
np.random.seed(0)
print("torch.cuda.device_count()", torch.cuda.device_count())
print("torch.cuda.current_device()", torch.cuda.current_device())
torch.cuda.set_device(1)
print("torch.cuda.current_device()", torch.cuda.current_device())


torch.cuda.device_count() 4
torch.cuda.current_device() 0
Out[2]:
1

In [3]:
class MNIST(torch.utils.data.Dataset):
  """`MNIST <http://yann.lecun.com/exdb/mnist/>`_ Dataset.
  Args:
    root (string): Root directory of dataset where ``processed/training.pt``
      and  ``processed/test.pt`` exist.
    dataset (string): If `train` or `valid`, creates dataset from ``training.pt``,
      otherwise from ``test.pt``.
    download (bool, optional): If true, downloads the dataset from the internet and
      puts it in root directory. If dataset is already downloaded, it is not
      downloaded again.
    transform (callable, optional): A function/transform that  takes in an PIL image
      and returns a transformed version. E.g, ``transforms.RandomCrop``
    target_transform (callable, optional): A function/transform that takes in the
      target and transforms it.
  """
  urls = [
    'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
    'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',
    'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
    'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz',
  ]
  raw_folder = 'raw'
  processed_folder = 'processed'
  training_file = 'training.pt'
  test_file = 'test.pt'

  def __init__(self, root, dataset='train', transform=None, target_transform=None, download=False):
    self.root = os.path.expanduser(root)
    self.transform = transform
    self.target_transform = target_transform
    self.dataset = dataset  # 'train', 'valid', or 'test'
    self.cutoff = 50000 # split between train vs validation

    if download:
      self.download()

    if not self._check_exists():
      raise RuntimeError('Dataset not found.' +
                        ' You can use download=True to download it')

    if self.dataset == 'train' or self.dataset == 'valid':
      full_train_data, full_train_labels = torch.load(os.path.join(root, self.processed_folder, self.training_file))
      if self.dataset == 'train':
        self.data = full_train_data[:self.cutoff]
        self.labels = full_train_labels[:self.cutoff]
      else:
        self.data = full_train_data[self.cutoff:]
        self.labels = full_train_labels[self.cutoff:]
    else:
      self.data, self.labels = torch.load(os.path.join(root, self.processed_folder, self.test_file))

  def __getitem__(self, index):
      """
      Args:
        index (int): Index
      Returns:
        tuple: (image, target) where target is index of the target class.
      """
      img, target = self.data[index], self.labels[index]

      # doing this so that it is consistent with all other datasets
      # to return a PIL Image
      img = Image.fromarray(img.numpy(), mode='L')

      if self.transform is not None:
        img = self.transform(img)

      if self.target_transform is not None:
        target = self.target_transform(target)

      return img, target

  def __len__(self):
    return len(self.data)

  def _check_exists(self):
    return os.path.exists(os.path.join(self.root, self.processed_folder, self.training_file)) and \
      os.path.exists(os.path.join(self.root, self.processed_folder, self.test_file))

  def download(self):
    """Download the MNIST data if it doesn't exist in processed_folder already."""
    from six.moves import urllib
    import gzip

    if self._check_exists():
      return

    # download files
    try:
      os.makedirs(os.path.join(self.root, self.raw_folder))
      os.makedirs(os.path.join(self.root, self.processed_folder))
    except OSError as e:
      if e.errno == errno.EEXIST:
        pass
      else:
        raise

    for url in self.urls:
      print('Downloading ' + url)
      data = urllib.request.urlopen(url)
      filename = url.rpartition('/')[2]
      file_path = os.path.join(self.root, self.raw_folder, filename)
      with open(file_path, 'wb') as f:
          f.write(data.read())
      with open(file_path.replace('.gz', ''), 'wb') as out_f, \
              gzip.GzipFile(file_path) as zip_f:
          out_f.write(zip_f.read())
      os.unlink(file_path)

    # process and save as torch files
    print('Processing...')

    training_set = (
      read_image_file(os.path.join(self.root, self.raw_folder, 'train-images-idx3-ubyte')),
      read_label_file(os.path.join(self.root, self.raw_folder, 'train-labels-idx1-ubyte'))
    )
    test_set = (
      read_image_file(os.path.join(self.root, self.raw_folder, 't10k-images-idx3-ubyte')),
      read_label_file(os.path.join(self.root, self.raw_folder, 't10k-labels-idx1-ubyte'))
    )
    with open(os.path.join(self.root, self.processed_folder, self.training_file), 'wb') as f:
      torch.save(training_set, f)
    with open(os.path.join(self.root, self.processed_folder, self.test_file), 'wb') as f:
      torch.save(test_set, f)

    print('Done!')


def get_int(b):
  return int(codecs.encode(b, 'hex'), 16)


def parse_byte(b):
  if isinstance(b, str):
    return ord(b)
  return b


def read_label_file(path):
  with open(path, 'rb') as f:
    data = f.read()
    assert get_int(data[:4]) == 2049
    length = get_int(data[4:8])
    labels = [parse_byte(b) for b in data[8:]]
    assert len(labels) == length
    return torch.LongTensor(labels)


def read_image_file(path):
  with open(path, 'rb') as f:
    data = f.read()
    assert get_int(data[:4]) == 2051
    length = get_int(data[4:8])
    num_rows = get_int(data[8:12])
    num_cols = get_int(data[12:16])
    images = []
    idx = 16
    for l in range(length):
      img = []
      images.append(img)
      for r in range(num_rows):
        row = []
        img.append(row)
        for c in range(num_cols):
          row.append(parse_byte(data[idx]))
          idx += 1
    assert len(images) == length
    return torch.ByteTensor(images).view(-1, 28, 28)

In [4]:
Args = namedtuple('Args', ['batch_size', 'test_batch_size', 'epochs', 'lr', 'cuda', 'seed', 'log_interval'])

In [5]:
args = Args(batch_size=1000, test_batch_size=1000, epochs=30, lr=0.001, cuda=True, seed=0, log_interval=10)

In [6]:
torch.manual_seed(args.seed)
if args.cuda:
  torch.cuda.manual_seed(args.seed)

In [7]:
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

train_loader = torch.utils.data.DataLoader(
  MNIST('MNIST_data', dataset='train', download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
  batch_size=args.batch_size, shuffle=True, **kwargs)

valid_loader = torch.utils.data.DataLoader(
  MNIST('MNIST_data', dataset='valid', download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
  batch_size=args.batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
  MNIST('MNIST_data', dataset='test', transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])),
  batch_size=args.batch_size, shuffle=True, **kwargs)

In [8]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.num_filter1 = 8
    self.num_filter2 = 16
    self.num_padding = 2
    # input is 28x28
    # padding=2 for same padding
    self.conv1 = nn.Conv2d(1, self.num_filter1, 5, padding=self.num_padding)
    # feature map size is 14*14 by pooling
    # padding=2 for same padding
    self.conv2 = nn.Conv2d(self.num_filter1, self.num_filter2, 5, padding=self.num_padding)
    # feature map size is 7*7 by pooling
    self.fc = nn.Linear(self.num_filter2*7*7, 10)

  def forward(self, x):
    x = F.max_pool2d(F.relu(self.conv1(x)), 2)
    x = F.max_pool2d(F.relu(self.conv2(x)), 2)
    x = x.view(-1, self.num_filter2*7*7)   # reshape Variable
    x = self.fc(x)
    return F.log_softmax(x)

In [9]:
model = Net()
if args.cuda:
  model.cuda()

orig_model = copy.deepcopy(model)

optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

In [10]:
def train(epoch, model, train_loader):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    if args.cuda:
      data, target = data.cuda(), target.cuda()
    data, target = Variable(data), Variable(target)
    optimizer.zero_grad()
    output = model(data)
    loss = F.nll_loss(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % args.log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.data[0]))

In [11]:
def evaluate(model, test_loader):
  model.eval()
  test_loss = 0
  correct = 0
  for data, target in test_loader:
    if args.cuda:
      data, target = data.cuda(), target.cuda()
    data, target = Variable(data, volatile=True), Variable(target)
    output = model(data)
    test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
    pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
    correct += pred.eq(target.data.view_as(pred)).cpu().sum()

  test_loss /= len(test_loader.dataset)
  acc = correct / len(test_loader.dataset)
  print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * acc))
  return acc

In [12]:
best_valid_acc = 0
for epoch in range(1, args.epochs + 1):
  train(epoch, model, train_loader)
  valid_acc = evaluate(model, valid_loader)
  if valid_acc >= best_valid_acc:
    best_valid_acc = valid_acc
    best_model = copy.deepcopy(model)
    print('best valid_acc', best_valid_acc * 100.)

eval_acc = evaluate(best_model, test_loader)
print('final test acc', eval_acc * 100.)


Train Epoch: 1 [0/50000 (0%)]	Loss: 2.303826
Train Epoch: 1 [10000/50000 (20%)]	Loss: 1.677860
Train Epoch: 1 [20000/50000 (40%)]	Loss: 0.841788
Train Epoch: 1 [30000/50000 (60%)]	Loss: 0.508113
Train Epoch: 1 [40000/50000 (80%)]	Loss: 0.429086

Average loss: 0.3061, Accuracy: 9138/10000 (91.3800%)

best valid_acc 91.38
Train Epoch: 2 [0/50000 (0%)]	Loss: 0.322196
Train Epoch: 2 [10000/50000 (20%)]	Loss: 0.280522
Train Epoch: 2 [20000/50000 (40%)]	Loss: 0.266068
Train Epoch: 2 [30000/50000 (60%)]	Loss: 0.249113
Train Epoch: 2 [40000/50000 (80%)]	Loss: 0.212267

Average loss: 0.1894, Accuracy: 9464/10000 (94.6400%)

best valid_acc 94.64
Train Epoch: 3 [0/50000 (0%)]	Loss: 0.169671
Train Epoch: 3 [10000/50000 (20%)]	Loss: 0.187899
Train Epoch: 3 [20000/50000 (40%)]	Loss: 0.179214
Train Epoch: 3 [30000/50000 (60%)]	Loss: 0.184280
Train Epoch: 3 [40000/50000 (80%)]	Loss: 0.199595

Average loss: 0.1434, Accuracy: 9612/10000 (96.1200%)

best valid_acc 96.12
Train Epoch: 4 [0/50000 (0%)]	Loss: 0.142606
Train Epoch: 4 [10000/50000 (20%)]	Loss: 0.128802
Train Epoch: 4 [20000/50000 (40%)]	Loss: 0.115742
Train Epoch: 4 [30000/50000 (60%)]	Loss: 0.123769
Train Epoch: 4 [40000/50000 (80%)]	Loss: 0.124975

Average loss: 0.1124, Accuracy: 9696/10000 (96.9600%)

best valid_acc 96.96000000000001
Train Epoch: 5 [0/50000 (0%)]	Loss: 0.103727
Train Epoch: 5 [10000/50000 (20%)]	Loss: 0.092302
Train Epoch: 5 [20000/50000 (40%)]	Loss: 0.103799
Train Epoch: 5 [30000/50000 (60%)]	Loss: 0.079889
Train Epoch: 5 [40000/50000 (80%)]	Loss: 0.092919

Average loss: 0.0918, Accuracy: 9739/10000 (97.3900%)

best valid_acc 97.39
Train Epoch: 6 [0/50000 (0%)]	Loss: 0.083168
Train Epoch: 6 [10000/50000 (20%)]	Loss: 0.110532
Train Epoch: 6 [20000/50000 (40%)]	Loss: 0.093374
Train Epoch: 6 [30000/50000 (60%)]	Loss: 0.075025
Train Epoch: 6 [40000/50000 (80%)]	Loss: 0.095143

Average loss: 0.0791, Accuracy: 9771/10000 (97.7100%)

best valid_acc 97.71
Train Epoch: 7 [0/50000 (0%)]	Loss: 0.069611
Train Epoch: 7 [10000/50000 (20%)]	Loss: 0.061767
Train Epoch: 7 [20000/50000 (40%)]	Loss: 0.079088
Train Epoch: 7 [30000/50000 (60%)]	Loss: 0.059623
Train Epoch: 7 [40000/50000 (80%)]	Loss: 0.084319

Average loss: 0.0741, Accuracy: 9786/10000 (97.8600%)

best valid_acc 97.86
Train Epoch: 8 [0/50000 (0%)]	Loss: 0.073275
Train Epoch: 8 [10000/50000 (20%)]	Loss: 0.081987
Train Epoch: 8 [20000/50000 (40%)]	Loss: 0.082448
Train Epoch: 8 [30000/50000 (60%)]	Loss: 0.068978
Train Epoch: 8 [40000/50000 (80%)]	Loss: 0.063742

Average loss: 0.0691, Accuracy: 9796/10000 (97.9600%)

best valid_acc 97.96000000000001
Train Epoch: 9 [0/50000 (0%)]	Loss: 0.064298
Train Epoch: 9 [10000/50000 (20%)]	Loss: 0.065013
Train Epoch: 9 [20000/50000 (40%)]	Loss: 0.069385
Train Epoch: 9 [30000/50000 (60%)]	Loss: 0.041582
Train Epoch: 9 [40000/50000 (80%)]	Loss: 0.066154

Average loss: 0.0631, Accuracy: 9815/10000 (98.1500%)

best valid_acc 98.15
Train Epoch: 10 [0/50000 (0%)]	Loss: 0.058424
Train Epoch: 10 [10000/50000 (20%)]	Loss: 0.061783
Train Epoch: 10 [20000/50000 (40%)]	Loss: 0.037734
Train Epoch: 10 [30000/50000 (60%)]	Loss: 0.068406
Train Epoch: 10 [40000/50000 (80%)]	Loss: 0.063054

Average loss: 0.0604, Accuracy: 9816/10000 (98.1600%)

best valid_acc 98.16
Train Epoch: 11 [0/50000 (0%)]	Loss: 0.038760
Train Epoch: 11 [10000/50000 (20%)]	Loss: 0.064318
Train Epoch: 11 [20000/50000 (40%)]	Loss: 0.044094
Train Epoch: 11 [30000/50000 (60%)]	Loss: 0.053251
Train Epoch: 11 [40000/50000 (80%)]	Loss: 0.041955

Average loss: 0.0585, Accuracy: 9827/10000 (98.2700%)

best valid_acc 98.27
Train Epoch: 12 [0/50000 (0%)]	Loss: 0.029978
Train Epoch: 12 [10000/50000 (20%)]	Loss: 0.032916
Train Epoch: 12 [20000/50000 (40%)]	Loss: 0.041434
Train Epoch: 12 [30000/50000 (60%)]	Loss: 0.067432
Train Epoch: 12 [40000/50000 (80%)]	Loss: 0.058171

Average loss: 0.0535, Accuracy: 9839/10000 (98.3900%)

best valid_acc 98.39
Train Epoch: 13 [0/50000 (0%)]	Loss: 0.036091
Train Epoch: 13 [10000/50000 (20%)]	Loss: 0.057308
Train Epoch: 13 [20000/50000 (40%)]	Loss: 0.058362
Train Epoch: 13 [30000/50000 (60%)]	Loss: 0.077994
Train Epoch: 13 [40000/50000 (80%)]	Loss: 0.033563

Average loss: 0.0561, Accuracy: 9831/10000 (98.3100%)

Train Epoch: 14 [0/50000 (0%)]	Loss: 0.044331
Train Epoch: 14 [10000/50000 (20%)]	Loss: 0.049645
Train Epoch: 14 [20000/50000 (40%)]	Loss: 0.032262
Train Epoch: 14 [30000/50000 (60%)]	Loss: 0.036172
Train Epoch: 14 [40000/50000 (80%)]	Loss: 0.031683

Average loss: 0.0522, Accuracy: 9849/10000 (98.4900%)

best valid_acc 98.49
Train Epoch: 15 [0/50000 (0%)]	Loss: 0.034813
Train Epoch: 15 [10000/50000 (20%)]	Loss: 0.046153
Train Epoch: 15 [20000/50000 (40%)]	Loss: 0.041864
Train Epoch: 15 [30000/50000 (60%)]	Loss: 0.027925
Train Epoch: 15 [40000/50000 (80%)]	Loss: 0.033366

Average loss: 0.0552, Accuracy: 9841/10000 (98.4100%)

Train Epoch: 16 [0/50000 (0%)]	Loss: 0.031628
Train Epoch: 16 [10000/50000 (20%)]	Loss: 0.035893
Train Epoch: 16 [20000/50000 (40%)]	Loss: 0.057490
Train Epoch: 16 [30000/50000 (60%)]	Loss: 0.041925
Train Epoch: 16 [40000/50000 (80%)]	Loss: 0.034331

Average loss: 0.0552, Accuracy: 9839/10000 (98.3900%)

Train Epoch: 17 [0/50000 (0%)]	Loss: 0.028255
Train Epoch: 17 [10000/50000 (20%)]	Loss: 0.034863
Train Epoch: 17 [20000/50000 (40%)]	Loss: 0.038391
Train Epoch: 17 [30000/50000 (60%)]	Loss: 0.037168
Train Epoch: 17 [40000/50000 (80%)]	Loss: 0.034141

Average loss: 0.0493, Accuracy: 9858/10000 (98.5800%)

best valid_acc 98.58
Train Epoch: 18 [0/50000 (0%)]	Loss: 0.029900
Train Epoch: 18 [10000/50000 (20%)]	Loss: 0.046907
Train Epoch: 18 [20000/50000 (40%)]	Loss: 0.028100
Train Epoch: 18 [30000/50000 (60%)]	Loss: 0.029808
Train Epoch: 18 [40000/50000 (80%)]	Loss: 0.029611

Average loss: 0.0473, Accuracy: 9867/10000 (98.6700%)

best valid_acc 98.67
Train Epoch: 19 [0/50000 (0%)]	Loss: 0.037320
Train Epoch: 19 [10000/50000 (20%)]	Loss: 0.044111
Train Epoch: 19 [20000/50000 (40%)]	Loss: 0.041239
Train Epoch: 19 [30000/50000 (60%)]	Loss: 0.032999
Train Epoch: 19 [40000/50000 (80%)]	Loss: 0.036658

Average loss: 0.0474, Accuracy: 9863/10000 (98.6300%)

Train Epoch: 20 [0/50000 (0%)]	Loss: 0.026344
Train Epoch: 20 [10000/50000 (20%)]	Loss: 0.027241
Train Epoch: 20 [20000/50000 (40%)]	Loss: 0.023188
Train Epoch: 20 [30000/50000 (60%)]	Loss: 0.018313
Train Epoch: 20 [40000/50000 (80%)]	Loss: 0.044020

Average loss: 0.0487, Accuracy: 9857/10000 (98.5700%)

Train Epoch: 21 [0/50000 (0%)]	Loss: 0.042182
Train Epoch: 21 [10000/50000 (20%)]	Loss: 0.034582
Train Epoch: 21 [20000/50000 (40%)]	Loss: 0.037214
Train Epoch: 21 [30000/50000 (60%)]	Loss: 0.027711
Train Epoch: 21 [40000/50000 (80%)]	Loss: 0.033280

Average loss: 0.0468, Accuracy: 9870/10000 (98.7000%)

best valid_acc 98.7
Train Epoch: 22 [0/50000 (0%)]	Loss: 0.023809
Train Epoch: 22 [10000/50000 (20%)]	Loss: 0.030482
Train Epoch: 22 [20000/50000 (40%)]	Loss: 0.026654
Train Epoch: 22 [30000/50000 (60%)]	Loss: 0.021818
Train Epoch: 22 [40000/50000 (80%)]	Loss: 0.022738

Average loss: 0.0490, Accuracy: 9862/10000 (98.6200%)

Train Epoch: 23 [0/50000 (0%)]	Loss: 0.024872
Train Epoch: 23 [10000/50000 (20%)]	Loss: 0.042878
Train Epoch: 23 [20000/50000 (40%)]	Loss: 0.031781
Train Epoch: 23 [30000/50000 (60%)]	Loss: 0.025498
Train Epoch: 23 [40000/50000 (80%)]	Loss: 0.030885

Average loss: 0.0451, Accuracy: 9873/10000 (98.7300%)

best valid_acc 98.72999999999999
Train Epoch: 24 [0/50000 (0%)]	Loss: 0.030728
Train Epoch: 24 [10000/50000 (20%)]	Loss: 0.025797
Train Epoch: 24 [20000/50000 (40%)]	Loss: 0.021019
Train Epoch: 24 [30000/50000 (60%)]	Loss: 0.023815
Train Epoch: 24 [40000/50000 (80%)]	Loss: 0.033169

Average loss: 0.0454, Accuracy: 9873/10000 (98.7300%)

best valid_acc 98.72999999999999
Train Epoch: 25 [0/50000 (0%)]	Loss: 0.025571
Train Epoch: 25 [10000/50000 (20%)]	Loss: 0.023449
Train Epoch: 25 [20000/50000 (40%)]	Loss: 0.025415
Train Epoch: 25 [30000/50000 (60%)]	Loss: 0.018114
Train Epoch: 25 [40000/50000 (80%)]	Loss: 0.033626

Average loss: 0.0482, Accuracy: 9869/10000 (98.6900%)

Train Epoch: 26 [0/50000 (0%)]	Loss: 0.018225
Train Epoch: 26 [10000/50000 (20%)]	Loss: 0.021161
Train Epoch: 26 [20000/50000 (40%)]	Loss: 0.022208
Train Epoch: 26 [30000/50000 (60%)]	Loss: 0.036804
Train Epoch: 26 [40000/50000 (80%)]	Loss: 0.035899

Average loss: 0.0441, Accuracy: 9884/10000 (98.8400%)

best valid_acc 98.83999999999999
Train Epoch: 27 [0/50000 (0%)]	Loss: 0.013573
Train Epoch: 27 [10000/50000 (20%)]	Loss: 0.016220
Train Epoch: 27 [20000/50000 (40%)]	Loss: 0.021805
Train Epoch: 27 [30000/50000 (60%)]	Loss: 0.030295
Train Epoch: 27 [40000/50000 (80%)]	Loss: 0.027188

Average loss: 0.0431, Accuracy: 9881/10000 (98.8100%)

Train Epoch: 28 [0/50000 (0%)]	Loss: 0.021916
Train Epoch: 28 [10000/50000 (20%)]	Loss: 0.019936
Train Epoch: 28 [20000/50000 (40%)]	Loss: 0.021940
Train Epoch: 28 [30000/50000 (60%)]	Loss: 0.010939
Train Epoch: 28 [40000/50000 (80%)]	Loss: 0.031478

Average loss: 0.0441, Accuracy: 9881/10000 (98.8100%)

Train Epoch: 29 [0/50000 (0%)]	Loss: 0.015474
Train Epoch: 29 [10000/50000 (20%)]	Loss: 0.014100
Train Epoch: 29 [20000/50000 (40%)]	Loss: 0.018405
Train Epoch: 29 [30000/50000 (60%)]	Loss: 0.016726
Train Epoch: 29 [40000/50000 (80%)]	Loss: 0.017310

Average loss: 0.0453, Accuracy: 9877/10000 (98.7700%)

Train Epoch: 30 [0/50000 (0%)]	Loss: 0.010478
Train Epoch: 30 [10000/50000 (20%)]	Loss: 0.024923
Train Epoch: 30 [20000/50000 (40%)]	Loss: 0.011567
Train Epoch: 30 [30000/50000 (60%)]	Loss: 0.021501
Train Epoch: 30 [40000/50000 (80%)]	Loss: 0.026448

Average loss: 0.0443, Accuracy: 9879/10000 (98.7900%)


Average loss: 0.0392, Accuracy: 9879/10000 (98.7900%)

final test acc 98.79

In [20]:
eval_acc = evaluate(model, test_loader)
print('final test acc', eval_acc * 100.)


Average loss: 0.0373, Accuracy: 9885/10000 (98.8500%)

final test acc 98.85000000000001

In [13]:
param_count = 0
for param in model.parameters():
  print(param.data.shape)
  param_count += np.product(param.data.shape)
print(param_count)


torch.Size([8, 1, 5, 5])
torch.Size([8])
torch.Size([16, 8, 5, 5])
torch.Size([16])
torch.Size([10, 784])
torch.Size([10])
11274

In [14]:
orig_params = []
for param in orig_model.parameters():
  orig_params.append(param.data.cpu().numpy().flatten())

In [15]:
orig_params_flat = np.concatenate(orig_params)

In [16]:
import matplotlib.pyplot as plt

In [17]:
_ = plt.hist(orig_params_flat, bins=200)
plt.show()



In [18]:
final_params = []
for param in best_model.parameters():
  final_params.append(param.data.cpu().numpy().flatten())
final_params_flat = np.concatenate(final_params)

In [19]:
_ = plt.hist(final_params_flat, bins=200)
plt.show()



In [ ]: