In [1]:
import torch
from torch import nn 
from torch.utils import data
import os
import torchvision
from torchvision import datasets, transforms, models
import torchvision.transforms.functional as tf
from PIL import Image
import numpy as np
import random
import matplotlib.pyplot as plt 
from tensorboardX import SummaryWriter
from tqdm import tqdm 
device = torch.device(1)
%matplotlib

torch.manual_seed(0)
torch.backends.cudnn.deterministic = True


Using matplotlib backend: TkAgg

In [2]:
class Dataset(data.Dataset):
    def __init__(self, list_images, labels, folder_name ="./"):
        self.list_images = list_images
        self.labels = labels
        self.folder_name = folder_name
        
    def __len__(self):
        return len (self.list_images)
    
    def __load_images__(self, image_file_name ):
        img = Image.open(os.path.join(self.folder_name,image_file_name)).resize((224, 224), resample=0)
        img = torch.Tensor((np.asarray(img).transpose(2,0,1))/255.0)
        return img
    
    def __getitem__(self, index):
        file_batch = self.__load_images__(self.list_images[index])
        label = self.labels[index]
        return file_batch, torch.tensor(label)

In [3]:
image_list = os.listdir("data/train/")
labels = []
for i in image_list:
    if i[:3] == 'dog':
        labels.append([1,0])
    else:
        labels.append([0,1])

In [4]:
training_set = Dataset(image_list, labels, folder_name="data/train/")

In [5]:
training_generator = data.DataLoader(training_set, batch_size=64, shuffle=True, num_workers=6)

In [6]:
for fn, label in training_generator:
    print(fn.shape, label.shape)
    break


torch.Size([64, 3, 224, 224]) torch.Size([64, 2])

In [7]:
fn.shape


Out[7]:
torch.Size([64, 3, 224, 224])

In [8]:
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3,stride=stride, padding=1, bias=False)

In [9]:
class BasicBlock(nn.Module):
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride
    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        
        if self.downsample is not None:
            residual = self.downsample(x)
            
        out += residual
        out = self.relu(out)
        
        return out

In [10]:
class ResNet(nn.Module):

    def __init__(self, BasicBlock, layers, num_classes=2):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = self._make_layer(BasicBlock, 64, layers[0])
        self.layer2 = self._make_layer(BasicBlock, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(BasicBlock, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(BasicBlock, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 , num_classes)
        self.tanh = nn.Tanh()
        
    def _make_layer(self, BasicBlock, planes, num_blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes:
            downsample = nn.Sequential(nn.Conv2d(self.inplanes, planes,kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes))
        layers = []
        layers.append(BasicBlock(self.inplanes, planes, stride, downsample))
        self.inplanes = planes
        for i in range(1, num_blocks):
            layers.append(BasicBlock(self.inplanes, planes))

        return nn.Sequential(*layers)
    def forward(self, x):
        x = self.conv1(x)    # 224x224
        x = self.bn1(x)     
        x = self.relu(x)
        x = self.maxpool(x)  # 112x112

        x = self.layer1(x)   # 56x56
        x = self.layer2(x)   # 28x28
        x = self.layer3(x)   # 14x14
        x = self.layer4(x)   # 7x7

        x = self.avgpool(x)  # 1x1
        x = x.view(x.size(0), -1)
        x = self.tanh (self.fc(x))

        return x

In [11]:
def resnet34(**kwargs):
    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
    return model

In [12]:
model = resnet34().to(device).half()

In [13]:
def get_binary_accuracy(predicted , gold ):
    predicted_argmax = torch.argmax(input=predicted, dim=1)
    gold_argmax = torch.argmax(input=gold, dim=1)
    correct = (predicted_argmax == gold_argmax)
    accuracy = float(sum(correct))/ len(gold_argmax)
    return accuracy

In [14]:
def pre_param_list(model:torch.nn.Module):
    model_params = [p for p in model.parameters() if p.requires_grad]
    master_params = [p.detach().clone().float() for p  in model.parameters()]
    for p in model_params:
        p.requires_grad = True
    
    return model_params, master_params

def master_param_to_model_param(model_param, master_param):
    for model, master in zip(model_param, master_param):
        model.data.copy_(master.data)
        
def model_grad_to_master_grad(model_param, master_param):
    for model, master in zip(model_param, master_param):
        if master.grad is None:
            master.grad = torch.autograd.Variable(master.data.new(*master.data.size()))
    master.grad.data.copy_(model.grad.data)

In [15]:
model_params, master_params = pre_param_list(model)

In [16]:
criteria = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(master_params,lr=0.0001,momentum=0.99)

In [17]:
def convert_BN_to_float(module):
    if  isinstance(module, torch.nn.modules.batchnorm._BatchNorm):
        module.float()
    for child in module.children():
        convert_BN_to_float(child)
    return module

In [18]:
for i in model.modules():
    convert_BN_to_float(i)

In [19]:
scale_factor  = 128
writer = SummaryWriter()
total_updates= 0 
for epoch_no in tqdm(range(50)):
    for fn, label in training_generator:
        optimizer.zero_grad()
        predict = model(fn.to(device).half())
        loss = criteria(predict.to(device).float(), label.half().to(device).float())
        scaled_loss = loss.float() * scale_factor
#         print(predict)

        scaled_loss.backward()
        model_grad_to_master_grad(model_params, master_params)
        for p in master_params:
            p.grad.data.mul_(1./scaled_loss)
            
        optimizer.step()
        
        master_param_to_model_param(model_params, master_params)
        accuracy = get_binary_accuracy(predict.type(torch.FloatTensor).cpu(), label)
        writer.add_scalar('Train/loss_1_GPU_TC', loss,total_updates )
        writer.add_scalar('Train/accuracy_1_GPU_TC', accuracy,total_updates )
        total_updates = total_updates + 1


 66%|██████▌   | 33/50 [15:34<07:59, 28.19s/it]
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-19-6007b895d8db> in <module>()
     13         model_grad_to_master_grad(model_params, master_params)
     14         for p in master_params:
---> 15             p.grad.data.mul_(1./scaled_loss)
     16 
     17         optimizer.step()

/data/sunil.patel/anaconda3/lib/python3.6/site-packages/torch/tensor.py in __rdiv__(self, other)
    373     def __rdiv__(self, other):
    374         if self.dtype.is_floating_point:
--> 375             return self.reciprocal() * other
    376         else:
    377             return (self.double().reciprocal() * other).type_as(self)

KeyboardInterrupt: 

In [ ]:
for each_layer in resnet.state_dict():
    print(each_layer,"\t", resnet.state_dict()[each_layer].shape)