In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from collections import namedtuple
from PIL import Image
import os
import os.path
import errno
import codecs
import copy
In [2]:
torch.manual_seed(0)
np.random.seed(0)
print("torch.cuda.device_count()", torch.cuda.device_count())
print("torch.cuda.current_device()", torch.cuda.current_device())
torch.cuda.set_device(1)
print("torch.cuda.current_device()", torch.cuda.current_device())
Out[2]:
In [3]:
class MNIST(torch.utils.data.Dataset):
"""`MNIST <http://yann.lecun.com/exdb/mnist/>`_ Dataset.
Args:
root (string): Root directory of dataset where ``processed/training.pt``
and ``processed/test.pt`` exist.
dataset (string): If `train` or `valid`, creates dataset from ``training.pt``,
otherwise from ``test.pt``.
download (bool, optional): If true, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
"""
urls = [
'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz',
]
raw_folder = 'raw'
processed_folder = 'processed'
training_file = 'training.pt'
test_file = 'test.pt'
def __init__(self, root, dataset='train', transform=None, target_transform=None, download=False):
self.root = os.path.expanduser(root)
self.transform = transform
self.target_transform = target_transform
self.dataset = dataset # 'train', 'valid', or 'test'
self.cutoff = 50000 # split between train vs validation
if download:
self.download()
if not self._check_exists():
raise RuntimeError('Dataset not found.' +
' You can use download=True to download it')
if self.dataset == 'train' or self.dataset == 'valid':
full_train_data, full_train_labels = torch.load(os.path.join(root, self.processed_folder, self.training_file))
if self.dataset == 'train':
self.data = full_train_data[:self.cutoff]
self.labels = full_train_labels[:self.cutoff]
else:
self.data = full_train_data[self.cutoff:]
self.labels = full_train_labels[self.cutoff:]
else:
self.data, self.labels = torch.load(os.path.join(root, self.processed_folder, self.test_file))
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: (image, target) where target is index of the target class.
"""
img, target = self.data[index], self.labels[index]
# doing this so that it is consistent with all other datasets
# to return a PIL Image
img = Image.fromarray(img.numpy(), mode='L')
if self.transform is not None:
img = self.transform(img)
if self.target_transform is not None:
target = self.target_transform(target)
return img, target
def __len__(self):
return len(self.data)
def _check_exists(self):
return os.path.exists(os.path.join(self.root, self.processed_folder, self.training_file)) and \
os.path.exists(os.path.join(self.root, self.processed_folder, self.test_file))
def download(self):
"""Download the MNIST data if it doesn't exist in processed_folder already."""
from six.moves import urllib
import gzip
if self._check_exists():
return
# download files
try:
os.makedirs(os.path.join(self.root, self.raw_folder))
os.makedirs(os.path.join(self.root, self.processed_folder))
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
raise
for url in self.urls:
print('Downloading ' + url)
data = urllib.request.urlopen(url)
filename = url.rpartition('/')[2]
file_path = os.path.join(self.root, self.raw_folder, filename)
with open(file_path, 'wb') as f:
f.write(data.read())
with open(file_path.replace('.gz', ''), 'wb') as out_f, \
gzip.GzipFile(file_path) as zip_f:
out_f.write(zip_f.read())
os.unlink(file_path)
# process and save as torch files
print('Processing...')
training_set = (
read_image_file(os.path.join(self.root, self.raw_folder, 'train-images-idx3-ubyte')),
read_label_file(os.path.join(self.root, self.raw_folder, 'train-labels-idx1-ubyte'))
)
test_set = (
read_image_file(os.path.join(self.root, self.raw_folder, 't10k-images-idx3-ubyte')),
read_label_file(os.path.join(self.root, self.raw_folder, 't10k-labels-idx1-ubyte'))
)
with open(os.path.join(self.root, self.processed_folder, self.training_file), 'wb') as f:
torch.save(training_set, f)
with open(os.path.join(self.root, self.processed_folder, self.test_file), 'wb') as f:
torch.save(test_set, f)
print('Done!')
def get_int(b):
return int(codecs.encode(b, 'hex'), 16)
def parse_byte(b):
if isinstance(b, str):
return ord(b)
return b
def read_label_file(path):
with open(path, 'rb') as f:
data = f.read()
assert get_int(data[:4]) == 2049
length = get_int(data[4:8])
labels = [parse_byte(b) for b in data[8:]]
assert len(labels) == length
return torch.LongTensor(labels)
def read_image_file(path):
with open(path, 'rb') as f:
data = f.read()
assert get_int(data[:4]) == 2051
length = get_int(data[4:8])
num_rows = get_int(data[8:12])
num_cols = get_int(data[12:16])
images = []
idx = 16
for l in range(length):
img = []
images.append(img)
for r in range(num_rows):
row = []
img.append(row)
for c in range(num_cols):
row.append(parse_byte(data[idx]))
idx += 1
assert len(images) == length
return torch.ByteTensor(images).view(-1, 28, 28)
In [4]:
Args = namedtuple('Args', ['batch_size', 'test_batch_size', 'epochs', 'lr', 'cuda', 'seed', 'log_interval'])
In [5]:
args = Args(batch_size=1000, test_batch_size=1000, epochs=30, lr=0.001, cuda=True, seed=0, log_interval=10)
In [6]:
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
In [7]:
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
train_loader = torch.utils.data.DataLoader(
MNIST('MNIST_data', dataset='train', download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
batch_size=args.batch_size, shuffle=True, **kwargs)
valid_loader = torch.utils.data.DataLoader(
MNIST('MNIST_data', dataset='valid', download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
MNIST('MNIST_data', dataset='test', transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])),
batch_size=args.batch_size, shuffle=True, **kwargs)
In [8]:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.num_filter1 = 8
self.num_filter2 = 16
self.num_padding = 2
# input is 28x28
# padding=2 for same padding
self.conv1 = nn.Conv2d(1, self.num_filter1, 5, padding=self.num_padding)
# feature map size is 14*14 by pooling
# padding=2 for same padding
self.conv2 = nn.Conv2d(self.num_filter1, self.num_filter2, 5, padding=self.num_padding)
# feature map size is 7*7 by pooling
self.fc = nn.Linear(self.num_filter2*7*7, 10)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), 2)
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, self.num_filter2*7*7) # reshape Variable
x = self.fc(x)
return F.log_softmax(x)
In [9]:
model = Net()
if args.cuda:
model.cuda()
orig_model = copy.deepcopy(model)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
In [10]:
def train(epoch, model, train_loader):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))
In [11]:
def evaluate(model, test_loader):
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
acc = correct / len(test_loader.dataset)
print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * acc))
return acc
In [12]:
best_valid_acc = 0
for epoch in range(1, args.epochs + 1):
train(epoch, model, train_loader)
valid_acc = evaluate(model, valid_loader)
if valid_acc >= best_valid_acc:
best_valid_acc = valid_acc
best_model = copy.deepcopy(model)
print('best valid_acc', best_valid_acc * 100.)
eval_acc = evaluate(best_model, test_loader)
print('final test acc', eval_acc * 100.)
In [20]:
eval_acc = evaluate(model, test_loader)
print('final test acc', eval_acc * 100.)
In [13]:
param_count = 0
for param in model.parameters():
print(param.data.shape)
param_count += np.product(param.data.shape)
print(param_count)
In [14]:
orig_params = []
for param in orig_model.parameters():
orig_params.append(param.data.cpu().numpy().flatten())
In [15]:
orig_params_flat = np.concatenate(orig_params)
In [16]:
import matplotlib.pyplot as plt
In [17]:
_ = plt.hist(orig_params_flat, bins=200)
plt.show()
In [18]:
final_params = []
for param in best_model.parameters():
final_params.append(param.data.cpu().numpy().flatten())
final_params_flat = np.concatenate(final_params)
In [19]:
_ = plt.hist(final_params_flat, bins=200)
plt.show()
In [ ]: