In [1]:
import torch
from torch import nn
from torch.utils import data
import os
import torchvision
from torchvision import datasets, transforms, models
import torchvision.transforms.functional as tf
from PIL import Image
import numpy as np
import random
import matplotlib.pyplot as plt
from tensorboardX import SummaryWriter
from tqdm import tqdm
device = torch.device(1)
%matplotlib
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
In [2]:
class Dataset(data.Dataset):
def __init__(self, list_images, labels, folder_name ="./"):
self.list_images = list_images
self.labels = labels
self.folder_name = folder_name
def __len__(self):
return len (self.list_images)
def __load_images__(self, image_file_name ):
img = Image.open(os.path.join(self.folder_name,image_file_name)).resize((224, 224), resample=0)
img = torch.Tensor((np.asarray(img).transpose(2,0,1))/255.0)
return img
def __getitem__(self, index):
file_batch = self.__load_images__(self.list_images[index])
label = self.labels[index]
return file_batch, torch.tensor(label)
In [3]:
image_list = os.listdir("data/train/")
labels = []
for i in image_list:
if i[:3] == 'dog':
labels.append([1,0])
else:
labels.append([0,1])
In [4]:
training_set = Dataset(image_list, labels, folder_name="data/train/")
In [5]:
training_generator = data.DataLoader(training_set, batch_size=64, shuffle=True, num_workers=6)
In [6]:
for fn, label in training_generator:
print(fn.shape, label.shape)
break
In [7]:
fn.shape
Out[7]:
In [8]:
def conv3x3(in_channels, out_channels, stride=1):
return nn.Conv2d(in_channels, out_channels, kernel_size=3,stride=stride, padding=1, bias=False)
In [9]:
class BasicBlock(nn.Module):
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
In [10]:
class ResNet(nn.Module):
def __init__(self, BasicBlock, layers, num_classes=2):
self.inplanes = 64
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(BasicBlock, 64, layers[0])
self.layer2 = self._make_layer(BasicBlock, 128, layers[1], stride=2)
self.layer3 = self._make_layer(BasicBlock, 256, layers[2], stride=2)
self.layer4 = self._make_layer(BasicBlock, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc = nn.Linear(512 , num_classes)
self.tanh = nn.Tanh()
def _make_layer(self, BasicBlock, planes, num_blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes:
downsample = nn.Sequential(nn.Conv2d(self.inplanes, planes,kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes))
layers = []
layers.append(BasicBlock(self.inplanes, planes, stride, downsample))
self.inplanes = planes
for i in range(1, num_blocks):
layers.append(BasicBlock(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x) # 224x224
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x) # 112x112
x = self.layer1(x) # 56x56
x = self.layer2(x) # 28x28
x = self.layer3(x) # 14x14
x = self.layer4(x) # 7x7
x = self.avgpool(x) # 1x1
x = x.view(x.size(0), -1)
x = self.tanh (self.fc(x))
return x
In [11]:
def resnet34(**kwargs):
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
return model
In [12]:
model = resnet34().to(device).half()
In [13]:
def get_binary_accuracy(predicted , gold ):
predicted_argmax = torch.argmax(input=predicted, dim=1)
gold_argmax = torch.argmax(input=gold, dim=1)
correct = (predicted_argmax == gold_argmax)
accuracy = float(sum(correct))/ len(gold_argmax)
return accuracy
In [14]:
def pre_param_list(model:torch.nn.Module):
model_params = [p for p in model.parameters() if p.requires_grad]
master_params = [p.detach().clone().float() for p in model.parameters()]
for p in model_params:
p.requires_grad = True
return model_params, master_params
def master_param_to_model_param(model_param, master_param):
for model, master in zip(model_param, master_param):
model.data.copy_(master.data)
def model_grad_to_master_grad(model_param, master_param):
for model, master in zip(model_param, master_param):
if master.grad is None:
master.grad = torch.autograd.Variable(master.data.new(*master.data.size()))
master.grad.data.copy_(model.grad.data)
In [15]:
model_params, master_params = pre_param_list(model)
In [16]:
criteria = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(master_params,lr=0.0001,momentum=0.99)
In [17]:
def convert_BN_to_float(module):
if isinstance(module, torch.nn.modules.batchnorm._BatchNorm):
module.float()
for child in module.children():
convert_BN_to_float(child)
return module
In [18]:
for i in model.modules():
convert_BN_to_float(i)
In [19]:
scale_factor = 128
writer = SummaryWriter()
total_updates= 0
for epoch_no in tqdm(range(50)):
for fn, label in training_generator:
optimizer.zero_grad()
predict = model(fn.to(device).half())
loss = criteria(predict.to(device).float(), label.half().to(device).float())
scaled_loss = loss.float() * scale_factor
# print(predict)
scaled_loss.backward()
model_grad_to_master_grad(model_params, master_params)
for p in master_params:
p.grad.data.mul_(1./scaled_loss)
optimizer.step()
master_param_to_model_param(model_params, master_params)
accuracy = get_binary_accuracy(predict.type(torch.FloatTensor).cpu(), label)
writer.add_scalar('Train/loss_1_GPU_TC', loss,total_updates )
writer.add_scalar('Train/accuracy_1_GPU_TC', accuracy,total_updates )
total_updates = total_updates + 1
In [ ]:
for each_layer in resnet.state_dict():
print(each_layer,"\t", resnet.state_dict()[each_layer].shape)