In [1]:
import os, glob, platform, datetime, random
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.utils.data as data_utils
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
from torch.autograd import Variable
from torch import functional as F
# import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms

import cv2
from PIL import Image
from tensorboardX import SummaryWriter

import numpy as np
from numpy.linalg import inv as denseinv
from scipy import sparse
from scipy.sparse import lil_matrix, csr_matrix
from scipy.sparse.linalg import spsolve
from scipy.sparse.linalg import inv as spinv
import scipy.misc

from myimagefolder import MyImageFolder
from mymodel import GradientNet
from myargs import Args

Configurations


In [2]:
args = Args()
args.test_scene = 'alley_1'
args.arch = "densenet121"
args.epoches = 500
args.epoches_unary_threshold = 0
args.image_h = 256
args.image_w = 256
args.img_extentions = ["png"]
args.training_thresholds = [250,200,150,50,0,300]
args.base_lr = 1
args.lr = args.base_lr
args.snapshot_interval = 5000
args.debug = True
args.gpu_num = 3
# growth_rate = (4*(2**(args.gpu_num)))
# transition_scale=(2*(2**(args.gpu_num+1)))
transition_scale=2
pretrained_scale=4
growth_rate = 32
args.display_interval = 50
args.display_curindex = 0

system_ = platform.system()
system_dist, system_version, _ = platform.dist()
if system_ == "Darwin": 
    args.train_dir = '/Volumes/Transcend/dataset/sintel2'
    args.pretrained = False
elif platform.dist() ==  ('debian', 'jessie/sid', ''):
    args.train_dir = '/home/lwp/workspace/sintel2'
    args.pretrained = True
elif platform.dist() == ('debian', 'stretch/sid', ''):
    args.train_dir = '/home/cad/lwp/workspace/dataset/sintel2'
    args.pretrained = True

if platform.system() == 'Linux': use_gpu = True
else: use_gpu = False
if use_gpu:
    torch.cuda.set_device(args.gpu_num)
    

print(platform.dist())


('debian', 'jessie/sid', '')

My DataLoader


In [3]:
train_dataset = MyImageFolder(args.train_dir, 'train',
                       transforms.Compose(
        [transforms.ToTensor()]
    ), random_crop=True, 
    img_extentions=args.img_extentions, test_scene=args.test_scene, image_h=args.image_h, image_w=args.image_w)
test_dataset = MyImageFolder(args.train_dir, 'test', 
                       transforms.Compose(
        [transforms.CenterCrop((args.image_h, args.image_w)),
         transforms.ToTensor()]
    ), random_crop=False,
    img_extentions=args.img_extentions, test_scene=args.test_scene, image_h=args.image_h, image_w=args.image_w)

train_loader = data_utils.DataLoader(train_dataset,1,True,num_workers=1)
test_loader = data_utils.DataLoader(test_dataset,1,True,num_workers=1)

Load Pretrained Model

Defination

  • DenseNet-121: num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16)
    • First Convolution: 32M -> 16M -> 8M
    • every transition: 8M -> 4M -> 2M (downsample 1/2, except the last block)

In [4]:
densenet = models.__dict__[args.arch](pretrained=args.pretrained)

for param in densenet.parameters():
    param.requires_grad = False

if use_gpu: densenet.cuda()

In [5]:
ss = 6

args.display_curindex = 0
args.base_lr = 0.05
args.display_interval = 20
args.momentum = 0.9
args.epoches = 120
args.training_thresholds = [ss*4,ss*3,ss*2,ss*1,ss*0,ss*5]
args.power = 0.5



# pretrained = PreTrainedModel(densenet)
# if use_gpu: 
#     pretrained.cuda()


net = GradientNet(densenet=densenet, growth_rate=growth_rate, transition_scale=transition_scale, pretrained_scale=pretrained_scale)
if use_gpu:
    net.cuda()

if use_gpu: 
    mse_losses = [nn.MSELoss().cuda()] * 6
    test_losses = [nn.MSELoss().cuda()] * 6
else:
    mse_losses = [nn.MSELoss()] * 6
    test_losses = [nn.MSELoss()] * 6

In [6]:
# training loop
writer = SummaryWriter(comment='-pretrained_scale_{}'.format(pretrained_scale))

parameters = filter(lambda p: p.requires_grad, net.parameters())
optimizer = optim.SGD(parameters, lr=args.base_lr, momentum=args.momentum)

def adjust_learning_rate(optimizer, epoch, beg, end, reset_lr=None):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    for param_group in optimizer.param_groups:
        if reset_lr != None:
            param_group['lr'] = reset_lr
            continue
        if epoch != 0: 
            # linear
#             param_group['lr'] *= (end-epoch) / (end-beg)
#             poly base_lr (1 - iter/max_iter) ^ (power)
            param_group['lr'] = args.base_lr * (float(end-epoch)/(end-beg)) ** (args.power)
            if param_group['lr'] < 1.0e-8: param_group['lr'] = 1.0e-8
        print('lr', param_group['lr'])
        
# def findLargerInd(target, arr):
#     res = list(filter(lambda x: x>target, arr))
#     print('res',res)
#     if len(res) == 0: return -1
#     return res[0]

for epoch in range(args.epoches):
    net.train()
    print('epoch: {} [{}]'.format(epoch, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
    
    if epoch < args.training_thresholds[-1]: adjust_learning_rate(optimizer, epoch%ss, beg=0, end=ss-1)
    else: adjust_learning_rate(optimizer, epoch, beg=args.training_thresholds[-1], end=args.epoches-1)
    
    run_losses = [0] * len(args.training_thresholds)
    run_cnts   = [0.00001] * len(args.training_thresholds)
    if (epoch in args.training_thresholds) == True: 
        adjust_learning_rate(optimizer, epoch, reset_lr=args.base_lr, beg=-1, end=-1)
    writer.add_scalar('learning rate', optimizer.param_groups[0]['lr'], global_step=epoch)
    for ind, data in enumerate(train_loader, 0):
#         if  ind == 1 : break
        input_img, gt_albedo, gt_shading, test_scene, img_path = data
        im = input_img[0,:,:,:].numpy(); im = im.transpose(1,2,0); im = im[:,:,::-1]*255
        
        if test_scene[0] == 'alley_1':
            print('alley_1 yes')
        input_img = Variable(input_img)
        gt_albedo = Variable(gt_albedo)
        gt_shading = Variable(gt_shading)
        if use_gpu:
            input_img = input_img.cuda()
            gt_albedo = gt_albedo.cuda()
            gt_shading = gt_shading.cuda()
#         run_losses = [0] * len(mse_losses)
#         run_cnts = [0.00001] * len(mse_losses)
        if args.display_curindex % args.display_interval == 0:
            cv2.imwrite('snapshot{}/input.png'.format(args.gpu_num), im)

        optimizer.zero_grad()
#         pretrained.train(); ft_pretreained = pretrained(input_img)
        ft_predict = net(input_img)
        for i, threshold in enumerate(args.training_thresholds):
#             threshold = args.training_thresholds[i]
            if epoch >= threshold:
#             if epoch >= 0:
                if i == 5: s = 1
                else: s = (2**(i+1))
                gt = gt_albedo.cpu().data.numpy()
                n,c,h,w = gt.shape
                gt = gt[0,:,:,:]
                gt = gt.transpose((1,2,0))
                gt = cv2.resize(gt, (h//s, w//s))
#                 gt = cv2.resize(gt, (h,w))
                if args.display_curindex % args.display_interval == 0:
                    cv2.imwrite('snapshot{}/gt-{}-{}.png'.format(args.gpu_num, epoch, i), gt[:,:,::-1]*255)
                gt = gt.transpose((2,0,1))
                gt = gt[np.newaxis, :]
                gt = Variable(torch.from_numpy(gt))
                if use_gpu: gt = gt.cuda()
                loss = mse_losses[i](ft_predict[i], gt)
                loss_data = loss.data.cpu().numpy()
                writer.add_scalar('{}th train iters loss'.format(i), loss_data, global_step=args.display_curindex)
                ma_ = ft_predict[i].max().cpu().data.numpy()
                mi_ = ft_predict[i].min().cpu().data.numpy()
                #print('mi', mi_, 'ma', ma_)
#                 writer.add_scalars('{}th train predict'.format(i), {'max': ma_, 'min': mi_}, global_step=args.display_curindex)
#                 run_cnts[i] += 1
                run_losses[i] += loss.data.cpu().numpy()[0]
                loss.backward(retain_graph=True)
                run_cnts[i] += 1
#                 print('i = ', i, '; weig\n', net.upsample01.weight[0,0,0:4,0:4].data.cpu().numpy())
#                 print('i = ', i, '; grad\n', net.upsample01.weight.grad[0,0,0:4,0:4].data.cpu().numpy())
                if args.display_curindex % args.display_interval == 0:
                    im = ft_predict[i].cpu().data.numpy()[0].transpose((1,2,0)) * 255
                    cv2.imwrite('snapshot{}/train-{}-{}.png'.format(args.gpu_num, epoch, i), im[:,:,::-1])
        optimizer.step()
        args.display_curindex += 1

    """ every epoch """
#     loss_output = 'ind: ' + str(args.display_curindex)
    loss_output = ''
    
    
    
    for i,v in enumerate(run_losses):
        if i == len(run_losses)-1: 
            loss_output += ' merged: %6f' % (run_losses[i] / run_cnts[i])
            continue
        loss_output += ' %2dM: %6f' % ((2**(4-i)), (run_losses[i] / run_cnts[i]))
    print(loss_output)
    # save at every epoch
    if (epoch+1) % 10 == 0:
        torch.save({
            'epoch': epoch,
            'args' : args,
            'state_dict': net.state_dict(),
            'optimizer': optimizer.state_dict()
        }, 'snapshot{}/snapshot-{}.pth.tar'.format(args.gpu_num, epoch))
    
    # test 
    test_losses_trainphase = [0] * len(args.training_thresholds)
    test_cnts_trainphase   = [0.00001] * len(args.training_thresholds)   
    for ind, data in enumerate(test_loader, 0):
        input_img, gt_albedo, gt_shading, test_scene, img_path = data
        input_img = Variable(input_img)
        gt_albedo = Variable(gt_albedo)
        gt_shading = Variable(gt_shading)
        if use_gpu:
            input_img = input_img.cuda(args.gpu_num)
        
#         pretrained.train(); ft_pretreained = pretrained(input_img)
        ft_test = net(input_img)
            
        for i,v in enumerate(ft_test):
            if epoch < args.training_thresholds[i]: continue
            if i == 5: s = 1
            else: s = (2**(i+1))
            gt = gt_albedo.data.numpy()
            n,c,h,w = gt.shape
            gt = gt[0,:,:,:]
            gt = gt.transpose((1,2,0))
            gt = cv2.resize(gt, (h//s, w//s))
#             gt = cv2.resize(gt, (h,w))
            
            gt = gt.transpose((2,0,1))
            gt = gt[np.newaxis, :]
            gt = Variable(torch.from_numpy(gt))
            if use_gpu: gt = gt.cuda()

            loss = mse_losses[i](ft_test[i], gt)
            
            test_losses_trainphase[i] += loss.data.cpu().numpy()[0]
            test_cnts_trainphase[i] += 1
            v = v[0].cpu().data.numpy()
            v = v.transpose(1,2,0)
            if ind == 0: cv2.imwrite('snapshot{}/test-phase_train-{}-{}.png'.format(args.gpu_num, epoch, i), v[:,:,::-1]*255)

    
    net.eval()
    test_losses = [0] * len(args.training_thresholds)
    test_cnts   = [0.00001] * len(args.training_thresholds)   
    for ind, data in enumerate(test_loader, 0):
#         if ind == 1: break
        input_img, gt_albedo, gt_shading, test_scene, img_path = data
        input_img = Variable(input_img)
        gt_albedo = Variable(gt_albedo)
        gt_shading = Variable(gt_shading)
        if use_gpu:
            input_img = input_img.cuda(args.gpu_num)
            
#         pretrained.eval(); ft_pretreained = pretrained(input_img)
        ft_test = net(input_img)
            
        for i,v in enumerate(ft_test):
            if epoch < args.training_thresholds[i]: continue
            if i == 5: s = 1
            else: s = (2**(i+1))
            gt = gt_albedo.data.numpy()
            n,c,h,w = gt.shape
            gt = gt[0,:,:,:]
            gt = gt.transpose((1,2,0))
            gt = cv2.resize(gt, (h//s, w//s))
#             gt = cv2.resize(gt, (h,w))
            
            gt = gt.transpose((2,0,1))
            gt = gt[np.newaxis, :]
            gt = Variable(torch.from_numpy(gt))
            if use_gpu: gt = gt.cuda()

            loss = mse_losses[i](ft_test[i], gt)
            
            test_losses[i] += loss.data.cpu().numpy()[0]
            test_cnts[i] += 1
            v = v[0].cpu().data.numpy()
            v = v.transpose(1,2,0)
            if ind == 0: cv2.imwrite('snapshot{}/test-phase_test-{}-{}.png'.format(args.gpu_num, epoch, i), v[:,:,::-1]*255)
    
    writer.add_scalars('16M loss', {
        'train 16M ': np.array([run_losses[0]/ run_cnts[0]]),
        'test_trainphase 16M ': np.array([test_losses_trainphase[0]/ test_cnts_trainphase[0]]),
        'test 16M ': np.array([test_losses[0]/ test_cnts[0]])
    }, global_step=epoch)  
    writer.add_scalars('8M loss', {
        'train 8M ': np.array([run_losses[1]/ run_cnts[1]]),
        'test_trainphase 8M ': np.array([test_losses_trainphase[1]/ test_cnts_trainphase[1]]),
        'test 8M ': np.array([test_losses[1]/ test_cnts[1]])
    }, global_step=epoch) 
    writer.add_scalars('4M loss', {
        'train 4M ': np.array([run_losses[2]/ run_cnts[2]]),
        'test_trainphase 4M ': np.array([test_losses_trainphase[2]/ test_cnts_trainphase[2]]),
        'test 4M ': np.array([test_losses[2]/ test_cnts[2]])
    }, global_step=epoch) 
    writer.add_scalars('2M loss', {
        'train 2M ': np.array([run_losses[3]/ run_cnts[3]]),
        'test_trainphase 2M ': np.array([test_losses_trainphase[3]/ test_cnts_trainphase[3]]),
        'test 2M ': np.array([test_losses[3]/ test_cnts[3]])
    }, global_step=epoch) 
    writer.add_scalars('1M loss', {
        'train 1M ': np.array([run_losses[4]/ run_cnts[4]]),
        'test_trainphase 1M ': np.array([test_losses_trainphase[4]/ test_cnts_trainphase[4]]),
        'test 1M ': np.array([test_losses[4]/ test_cnts[4]])
    }, global_step=epoch) 
    writer.add_scalars('merged loss', {
        'train merged ': np.array([run_losses[5]/ run_cnts[5]]),
        'test_trainphase merged ': np.array([test_losses_trainphase[5]/ test_cnts_trainphase[5]]),
        'test merged ': np.array([test_losses[5]/ test_cnts[5]])
    }, global_step=epoch)


epoch: 0 [2017-11-23 15:43:54]
lr 0.05
 16M: 0.000000  8M: 0.000000  4M: 0.000000  2M: 0.000000  1M: 0.057529 merged: 0.000000
epoch: 1 [2017-11-23 15:46:02]
lr 0.044721359549995794
 16M: 0.000000  8M: 0.000000  4M: 0.000000  2M: 0.000000  1M: 0.052053 merged: 0.000000
epoch: 2 [2017-11-23 15:48:10]
lr 0.038729833462074176
 16M: 0.000000  8M: 0.000000  4M: 0.000000  2M: 0.000000  1M: 0.047733 merged: 0.000000
epoch: 3 [2017-11-23 15:50:17]
lr 0.0316227766016838
 16M: 0.000000  8M: 0.000000  4M: 0.000000  2M: 0.000000  1M: 0.044509 merged: 0.000000
epoch: 4 [2017-11-23 15:52:24]
lr 0.022360679774997897
 16M: 0.000000  8M: 0.000000  4M: 0.000000  2M: 0.000000  1M: 0.038485 merged: 0.000000
epoch: 5 [2017-11-23 15:54:30]
lr 1e-08
 16M: 0.000000  8M: 0.000000  4M: 0.000000  2M: 0.000000  1M: 0.033386 merged: 0.000000
epoch: 6 [2017-11-23 15:56:38]
lr 1e-08
 16M: 0.000000  8M: 0.000000  4M: 0.000000  2M: 0.052660  1M: 0.041762 merged: 0.000000
epoch: 7 [2017-11-23 15:59:08]
lr 0.044721359549995794
 16M: 0.000000  8M: 0.000000  4M: 0.000000  2M: 0.050439  1M: 0.037072 merged: 0.000000
epoch: 8 [2017-11-23 16:02:08]
lr 0.038729833462074176
 16M: 0.000000  8M: 0.000000  4M: 0.000000  2M: 0.046301  1M: 0.032468 merged: 0.000000
epoch: 9 [2017-11-23 16:04:56]
lr 0.0316227766016838
 16M: 0.000000  8M: 0.000000  4M: 0.000000  2M: 0.041922  1M: 0.027672 merged: 0.000000
epoch: 10 [2017-11-23 16:07:31]
lr 0.022360679774997897
 16M: 0.000000  8M: 0.000000  4M: 0.000000  2M: 0.035036  1M: 0.025137 merged: 0.000000
epoch: 11 [2017-11-23 16:10:04]
lr 1e-08
 16M: 0.000000  8M: 0.000000  4M: 0.000000  2M: 0.029670  1M: 0.024691 merged: 0.000000
epoch: 12 [2017-11-23 16:12:47]
lr 1e-08
 16M: 0.000000  8M: 0.000000  4M: 0.049411  2M: 0.042442  1M: 0.030162 merged: 0.000000
epoch: 13 [2017-11-23 16:15:49]
lr 0.044721359549995794
 16M: 0.000000  8M: 0.000000  4M: 0.040240  2M: 0.035206  1M: 0.027575 merged: 0.000000
epoch: 14 [2017-11-23 16:18:32]
lr 0.038729833462074176
 16M: 0.000000  8M: 0.000000  4M: 0.033164  2M: 0.029420  1M: 0.025735 merged: 0.000000
epoch: 15 [2017-11-23 16:21:19]
lr 0.0316227766016838
 16M: 0.000000  8M: 0.000000  4M: 0.026806  2M: 0.027602  1M: 0.024130 merged: 0.000000
epoch: 16 [2017-11-23 16:24:45]
lr 0.022360679774997897
 16M: 0.000000  8M: 0.000000  4M: 0.022783  2M: 0.023555  1M: 0.022533 merged: 0.000000
epoch: 17 [2017-11-23 16:28:14]
lr 1e-08
 16M: 0.000000  8M: 0.000000  4M: 0.020267  2M: 0.023194  1M: 0.021651 merged: 0.000000
epoch: 18 [2017-11-23 16:31:15]
lr 1e-08
 16M: 0.000000  8M: 0.047238  4M: 0.026660  2M: 0.027711  1M: 0.025505 merged: 0.000000
epoch: 19 [2017-11-23 16:34:21]
lr 0.044721359549995794
 16M: 0.000000  8M: 0.036079  4M: 0.023929  2M: 0.025823  1M: 0.026577 merged: 0.000000
epoch: 20 [2017-11-23 16:37:22]
lr 0.038729833462074176
 16M: 0.000000  8M: 0.024981  4M: 0.020728  2M: 0.022587  1M: 0.023673 merged: 0.000000
epoch: 21 [2017-11-23 16:40:24]
lr 0.0316227766016838
 16M: 0.000000  8M: 0.021695  4M: 0.017605  2M: 0.019461  1M: 0.020909 merged: 0.000000
epoch: 22 [2017-11-23 16:43:37]
lr 0.022360679774997897
 16M: 0.000000  8M: 0.018775  4M: 0.015763  2M: 0.018946  1M: 0.020876 merged: 0.000000
epoch: 23 [2017-11-23 16:46:34]
lr 1e-08
 16M: 0.000000  8M: 0.018228  4M: 0.015985  2M: 0.018831  1M: 0.019734 merged: 0.000000
epoch: 24 [2017-11-23 16:49:33]
lr 1e-08
 16M: 0.042900  8M: 0.022284  4M: 0.019680  2M: 0.022466  1M: 0.022870 merged: 0.000000
epoch: 25 [2017-11-23 16:52:44]
lr 0.044721359549995794
 16M: 0.031403  8M: 0.019927  4M: 0.017176  2M: 0.023608  1M: 0.023303 merged: 0.000000
epoch: 26 [2017-11-23 16:55:57]
lr 0.038729833462074176
 16M: 0.024404  8M: 0.017497  4M: 0.015574  2M: 0.020273  1M: 0.021179 merged: 0.000000
epoch: 27 [2017-11-23 16:59:08]
lr 0.0316227766016838
 16M: 0.020854  8M: 0.016100  4M: 0.014514  2M: 0.019134  1M: 0.020583 merged: 0.000000
epoch: 28 [2017-11-23 17:02:21]
lr 0.022360679774997897
 16M: 0.017761  8M: 0.014226  4M: 0.013097  2M: 0.016901  1M: 0.018803 merged: 0.000000
epoch: 29 [2017-11-23 17:05:35]
lr 1e-08
 16M: 0.018023  8M: 0.014519  4M: 0.012807  2M: 0.016458  1M: 0.019170 merged: 0.000000
epoch: 30 [2017-11-23 17:08:50]
lr 0.05
 16M: 0.022868  8M: 0.022000  4M: 0.017339  2M: 0.020371  1M: 0.024010 merged: 0.035861
epoch: 31 [2017-11-23 17:13:47]
lr 0.04971830761761256
 16M: 0.022604  8M: 0.022733  4M: 0.018687  2M: 0.021476  1M: 0.022745 merged: 0.025234
epoch: 32 [2017-11-23 17:18:56]
lr 0.04943501011144937
 16M: 0.021031  8M: 0.019526  4M: 0.017373  2M: 0.020456  1M: 0.022177 merged: 0.021963
epoch: 33 [2017-11-23 17:24:06]
lr 0.04915007972606608
 16M: 0.019810  8M: 0.018614  4M: 0.016391  2M: 0.020616  1M: 0.022471 merged: 0.020040
epoch: 34 [2017-11-23 17:29:16]
lr 0.04886348789677424
 16M: 0.017681  8M: 0.016625  4M: 0.014983  2M: 0.018017  1M: 0.021331 merged: 0.017452
epoch: 35 [2017-11-23 17:34:27]
lr 0.04857520521621862
 16M: 0.016507  8M: 0.016445  4M: 0.015553  2M: 0.017710  1M: 0.020075 merged: 0.016852
epoch: 36 [2017-11-23 17:39:40]
lr 0.04828520139915856
 16M: 0.016738  8M: 0.016614  4M: 0.014675  2M: 0.017438  1M: 0.019103 merged: 0.016523
epoch: 37 [2017-11-23 17:44:51]
lr 0.047993445245333805
 16M: 0.015796  8M: 0.016929  4M: 0.014143  2M: 0.016568  1M: 0.019933 merged: 0.015665
epoch: 38 [2017-11-23 17:50:03]
lr 0.0476999046002862
 16M: 0.015200  8M: 0.016161  4M: 0.012921  2M: 0.015584  1M: 0.018465 merged: 0.014890
epoch: 39 [2017-11-23 17:55:11]
lr 0.04740454631399772
 16M: 0.015028  8M: 0.015367  4M: 0.013945  2M: 0.015838  1M: 0.018709 merged: 0.015163
epoch: 40 [2017-11-23 18:00:23]
lr 0.04710733619719444
 16M: 0.014390  8M: 0.014877  4M: 0.013382  2M: 0.015524  1M: 0.018772 merged: 0.013994
epoch: 41 [2017-11-23 18:05:34]
lr 0.04680823897515326
 16M: 0.013029  8M: 0.013763  4M: 0.012212  2M: 0.014315  1M: 0.017482 merged: 0.013051
epoch: 42 [2017-11-23 18:10:44]
lr 0.04650721823883479
 16M: 0.013787  8M: 0.014112  4M: 0.012355  2M: 0.014678  1M: 0.018127 merged: 0.013009
epoch: 43 [2017-11-23 18:15:54]
lr 0.046204236393150765
 16M: 0.013288  8M: 0.013789  4M: 0.012527  2M: 0.015176  1M: 0.018391 merged: 0.012761
epoch: 44 [2017-11-23 18:21:05]
lr 0.045899254602157845
 16M: 0.012557  8M: 0.012827  4M: 0.011943  2M: 0.015878  1M: 0.017626 merged: 0.012073
epoch: 45 [2017-11-23 18:26:16]
lr 0.04559223273095164
 16M: 0.012608  8M: 0.012716  4M: 0.011517  2M: 0.014587  1M: 0.017417 merged: 0.012257
epoch: 46 [2017-11-23 18:31:28]
lr 0.045283129284014914
 16M: 0.013142  8M: 0.012630  4M: 0.012355  2M: 0.014734  1M: 0.017555 merged: 0.012003
epoch: 47 [2017-11-23 18:36:36]
lr 0.04497190133975169
 16M: 0.011955  8M: 0.012775  4M: 0.011551  2M: 0.013994  1M: 0.017096 merged: 0.011469
epoch: 48 [2017-11-23 18:41:47]
lr 0.04465850448091506
 16M: 0.011482  8M: 0.011843  4M: 0.011256  2M: 0.013310  1M: 0.016649 merged: 0.011009
epoch: 49 [2017-11-23 18:46:58]
lr 0.044342892720609255
 16M: 0.011265  8M: 0.011793  4M: 0.011177  2M: 0.013884  1M: 0.016859 merged: 0.010755
epoch: 50 [2017-11-23 18:52:08]
lr 0.044025018423517
 16M: 0.011383  8M: 0.011811  4M: 0.010549  2M: 0.014723  1M: 0.016572 merged: 0.010640
epoch: 51 [2017-11-23 18:57:17]
lr 0.04370483222197017
 16M: 0.011692  8M: 0.012029  4M: 0.010874  2M: 0.014425  1M: 0.016516 merged: 0.010671
epoch: 52 [2017-11-23 19:02:29]
lr 0.043382282926444894
 16M: 0.011165  8M: 0.011122  4M: 0.010002  2M: 0.013397  1M: 0.015719 merged: 0.010046
epoch: 53 [2017-11-23 19:07:40]
lr 0.04305731743002185
 16M: 0.011283  8M: 0.011163  4M: 0.010006  2M: 0.013578  1M: 0.016116 merged: 0.010307
epoch: 54 [2017-11-23 19:13:06]
lr 0.04272988060630656
 16M: 0.010535  8M: 0.010700  4M: 0.009819  2M: 0.012870  1M: 0.016240 merged: 0.009513
epoch: 55 [2017-11-23 19:18:37]
lr 0.04239991520025441
 16M: 0.010293  8M: 0.010418  4M: 0.009553  2M: 0.012348  1M: 0.015533 merged: 0.009165
epoch: 56 [2017-11-23 19:24:09]
lr 0.0420673617112877
 16M: 0.010109  8M: 0.010140  4M: 0.009116  2M: 0.012568  1M: 0.015862 merged: 0.009150
epoch: 57 [2017-11-23 19:29:39]
lr 0.041732158268029534
 16M: 0.009993  8M: 0.010111  4M: 0.009303  2M: 0.012421  1M: 0.015503 merged: 0.008965
epoch: 58 [2017-11-23 19:35:12]
lr 0.041394240493907074
 16M: 0.009499  8M: 0.009896  4M: 0.009043  2M: 0.012145  1M: 0.015190 merged: 0.008711
epoch: 59 [2017-11-23 19:40:43]
lr 0.041053541362798006
 16M: 0.009537  8M: 0.009896  4M: 0.009136  2M: 0.012512  1M: 0.015569 merged: 0.008738
epoch: 60 [2017-11-23 19:46:14]
lr 0.04070999104380296
 16M: 0.009603  8M: 0.009982  4M: 0.009000  2M: 0.012128  1M: 0.015766 merged: 0.008674
epoch: 61 [2017-11-23 19:51:43]
lr 0.04036351673412598
 16M: 0.009977  8M: 0.010122  4M: 0.009281  2M: 0.012157  1M: 0.015210 merged: 0.008935
epoch: 62 [2017-11-23 19:57:14]
lr 0.04001404247893005
 16M: 0.009393  8M: 0.009777  4M: 0.008825  2M: 0.011753  1M: 0.015355 merged: 0.008367
epoch: 63 [2017-11-23 20:02:44]
lr 0.03966148897690515
 16M: 0.009161  8M: 0.009564  4M: 0.008530  2M: 0.011601  1M: 0.015045 merged: 0.008183
epoch: 64 [2017-11-23 20:08:13]
lr 0.03930577337013889
 16M: 0.008812  8M: 0.009290  4M: 0.008442  2M: 0.011383  1M: 0.014810 merged: 0.007977
epoch: 65 [2017-11-23 20:13:44]
lr 0.038946809016712394
 16M: 0.008878  8M: 0.009216  4M: 0.008462  2M: 0.011321  1M: 0.014739 merged: 0.007848
epoch: 66 [2017-11-23 20:19:13]
lr 0.03858450524425343
 16M: 0.008581  8M: 0.009077  4M: 0.008197  2M: 0.011027  1M: 0.014407 merged: 0.007745
epoch: 67 [2017-11-23 20:24:44]
lr 0.03821876708246056
 16M: 0.008340  8M: 0.008750  4M: 0.008012  2M: 0.011278  1M: 0.014469 merged: 0.007490
epoch: 68 [2017-11-23 20:30:14]
lr 0.03784949497236286
 16M: 0.008798  8M: 0.009118  4M: 0.008236  2M: 0.011350  1M: 0.014636 merged: 0.007879
epoch: 69 [2017-11-23 20:35:45]
lr 0.03747658444979307
 16M: 0.008355  8M: 0.008611  4M: 0.007987  2M: 0.010985  1M: 0.014474 merged: 0.007421
epoch: 70 [2017-11-23 20:41:14]
lr 0.0370999258002226
 16M: 0.009986  8M: 0.009059  4M: 0.008087  2M: 0.010877  1M: 0.014205 merged: 0.008135
epoch: 71 [2017-11-23 20:46:44]
lr 0.03671940368172628
 16M: 0.009011  8M: 0.010389  4M: 0.008711  2M: 0.011151  1M: 0.014612 merged: 0.008282
epoch: 72 [2017-11-23 20:52:12]
lr 0.03633489671240478
 16M: 0.008661  8M: 0.009530  4M: 0.008196  2M: 0.010868  1M: 0.014212 merged: 0.007759
epoch: 73 [2017-11-23 20:57:40]
lr 0.03594627701808178
 16M: 0.008116  8M: 0.008763  4M: 0.007732  2M: 0.010563  1M: 0.013721 merged: 0.007283
epoch: 74 [2017-11-23 21:03:09]
lr 0.035553409735498295
 16M: 0.007874  8M: 0.008501  4M: 0.007748  2M: 0.010780  1M: 0.013769 merged: 0.007070
epoch: 75 [2017-11-23 21:08:38]
lr 0.03515615246553262
 16M: 0.008911  8M: 0.009414  4M: 0.008152  2M: 0.011163  1M: 0.014159 merged: 0.008066
epoch: 76 [2017-11-23 21:14:07]
lr 0.03475435467016077
 16M: 0.008237  8M: 0.008750  4M: 0.007804  2M: 0.011255  1M: 0.014265 merged: 0.007364
epoch: 77 [2017-11-23 21:19:34]
lr 0.034347857005916346
 16M: 0.007930  8M: 0.008453  4M: 0.007734  2M: 0.010667  1M: 0.013982 merged: 0.007036
epoch: 78 [2017-11-23 21:24:59]
lr 0.0339364905854808
 16M: 0.007845  8M: 0.008258  4M: 0.007609  2M: 0.010933  1M: 0.014286 merged: 0.006898
epoch: 79 [2017-11-23 21:30:26]
lr 0.03352007615769955
 16M: 0.007917  8M: 0.008248  4M: 0.007614  2M: 0.010551  1M: 0.014096 merged: 0.007014
epoch: 80 [2017-11-23 21:35:52]
lr 0.03309842319473132
 16M: 0.007855  8M: 0.008164  4M: 0.007562  2M: 0.010581  1M: 0.013688 merged: 0.006881
epoch: 81 [2017-11-23 21:41:18]
lr 0.03267132887314317
 16M: 0.008005  8M: 0.008115  4M: 0.008120  2M: 0.010602  1M: 0.014096 merged: 0.007187
epoch: 82 [2017-11-23 21:46:46]
lr 0.03223857693349118
 16M: 0.007870  8M: 0.008094  4M: 0.007843  2M: 0.010590  1M: 0.013892 merged: 0.006835
epoch: 83 [2017-11-23 21:52:12]
lr 0.0317999364001908
 16M: 0.007525  8M: 0.007908  4M: 0.007426  2M: 0.011133  1M: 0.014104 merged: 0.006713
epoch: 84 [2017-11-23 21:57:37]
lr 0.031355160140170396
 16M: 0.007569  8M: 0.007786  4M: 0.007255  2M: 0.010367  1M: 0.013363 merged: 0.006599
epoch: 85 [2017-11-23 22:03:03]
lr 0.03090398323477543
 16M: 0.007491  8M: 0.007936  4M: 0.007239  2M: 0.012398  1M: 0.013354 merged: 0.006629
epoch: 86 [2017-11-23 22:08:39]
lr 0.030446121134470178
 16M: 0.007505  8M: 0.007898  4M: 0.007264  2M: 0.011187  1M: 0.013620 merged: 0.006584
epoch: 87 [2017-11-23 22:13:58]
lr 0.02998126755983446
 16M: 0.007424  8M: 0.007758  4M: 0.007256  2M: 0.010742  1M: 0.013346 merged: 0.006449
epoch: 88 [2017-11-23 22:19:46]
lr 0.029509092104873926
 16M: 0.007204  8M: 0.007525  4M: 0.007055  2M: 0.010578  1M: 0.013270 merged: 0.006289
epoch: 89 [2017-11-23 22:25:15]
lr 0.029029237489356888
 16M: 0.007809  8M: 0.007911  4M: 0.007236  2M: 0.010552  1M: 0.013333 merged: 0.006710
epoch: 90 [2017-11-23 22:30:43]
lr 0.028541316395237167
 16M: 0.007328  8M: 0.007439  4M: 0.006980  2M: 0.010294  1M: 0.013264 merged: 0.006326
epoch: 91 [2017-11-23 22:36:12]
lr 0.028044907807525134
 16M: 0.007217  8M: 0.007531  4M: 0.007042  2M: 0.010861  1M: 0.013415 merged: 0.006340
epoch: 92 [2017-11-23 22:42:08]
lr 0.027539552761294706
 16M: 0.007116  8M: 0.007329  4M: 0.006855  2M: 0.010319  1M: 0.013064 merged: 0.006180
epoch: 93 [2017-11-23 22:47:36]
lr 0.027024749372597065
 16M: 0.006946  8M: 0.007394  4M: 0.006868  2M: 0.010244  1M: 0.012990 merged: 0.006099
epoch: 94 [2017-11-23 22:53:05]
lr 0.026499947000159004
 16M: 0.007324  8M: 0.007675  4M: 0.007085  2M: 0.010337  1M: 0.013587 merged: 0.006440
epoch: 95 [2017-11-23 22:58:35]
lr 0.02596453934447493
 16M: 0.006931  8M: 0.007394  4M: 0.006797  2M: 0.009870  1M: 0.012910 merged: 0.006179
epoch: 96 [2017-11-23 23:04:04]
lr 0.025417856237895775
 16M: 0.006912  8M: 0.007295  4M: 0.006809  2M: 0.009900  1M: 0.013311 merged: 0.006007
epoch: 97 [2017-11-23 23:09:31]
lr 0.02485915380880628
 16M: 0.006924  8M: 0.007132  4M: 0.006716  2M: 0.009860  1M: 0.012932 merged: 0.005838
epoch: 98 [2017-11-23 23:14:58]
lr 0.02428760260810931
 16M: 0.006658  8M: 0.006927  4M: 0.006499  2M: 0.009529  1M: 0.012825 merged: 0.005800
epoch: 99 [2017-11-23 23:20:27]
lr 0.02370227315699886
 16M: 0.006812  8M: 0.007136  4M: 0.006739  2M: 0.009923  1M: 0.013113 merged: 0.005914
epoch: 100 [2017-11-23 23:25:56]
lr 0.023102118196575382
 16M: 0.006817  8M: 0.007076  4M: 0.006812  2M: 0.009929  1M: 0.013181 merged: 0.005981
epoch: 101 [2017-11-23 23:31:24]
lr 0.022485950669875843
 16M: 0.006725  8M: 0.007144  4M: 0.006630  2M: 0.009669  1M: 0.012667 merged: 0.005832
epoch: 102 [2017-11-23 23:36:52]
lr 0.021852416110985085
 16M: 0.006482  8M: 0.006971  4M: 0.006546  2M: 0.009565  1M: 0.012909 merged: 0.005610
epoch: 103 [2017-11-23 23:42:23]
lr 0.021199957600127203
 16M: 0.006648  8M: 0.007026  4M: 0.006626  2M: 0.009598  1M: 0.012959 merged: 0.005697
epoch: 104 [2017-11-23 23:47:52]
lr 0.020526770681399003
 16M: 0.006526  8M: 0.006929  4M: 0.006607  2M: 0.009517  1M: 0.012723 merged: 0.005658
epoch: 105 [2017-11-23 23:53:25]
lr 0.019830744488452574
 16M: 0.006467  8M: 0.006852  4M: 0.006539  2M: 0.009604  1M: 0.012652 merged: 0.005638
epoch: 106 [2017-11-23 23:58:45]
lr 0.01910938354123028
 16M: 0.006632  8M: 0.006940  4M: 0.006541  2M: 0.009548  1M: 0.012591 merged: 0.005638
epoch: 107 [2017-11-24 00:04:20]
lr 0.01835970184086314
 16M: 0.006379  8M: 0.006690  4M: 0.006362  2M: 0.009203  1M: 0.012056 merged: 0.005523
epoch: 108 [2017-11-24 00:09:53]
lr 0.01757807623276631
 16M: 0.006335  8M: 0.006705  4M: 0.006440  2M: 0.009411  1M: 0.012206 merged: 0.005532
epoch: 109 [2017-11-24 00:15:27]
lr 0.016760038078849775
 16M: 0.006317  8M: 0.006546  4M: 0.006282  2M: 0.009211  1M: 0.012166 merged: 0.005460
epoch: 110 [2017-11-24 00:20:55]
lr 0.0158999682000954
 16M: 0.006179  8M: 0.006510  4M: 0.006246  2M: 0.009012  1M: 0.012147 merged: 0.005397
epoch: 111 [2017-11-24 00:26:15]
lr 0.01499063377991723
 16M: 0.006241  8M: 0.006531  4M: 0.006288  2M: 0.009170  1M: 0.012308 merged: 0.005377
epoch: 112 [2017-11-24 00:31:37]
lr 0.014022453903762567
 16M: 0.006141  8M: 0.006460  4M: 0.006098  2M: 0.008873  1M: 0.011901 merged: 0.005283
epoch: 113 [2017-11-24 00:36:56]
lr 0.012982269672237465
 16M: 0.006144  8M: 0.006443  4M: 0.006141  2M: 0.008988  1M: 0.012026 merged: 0.005310
epoch: 114 [2017-11-24 00:42:07]
lr 0.01185113657849943
 16M: 0.006250  8M: 0.006622  4M: 0.006385  2M: 0.009280  1M: 0.012732 merged: 0.005416
epoch: 115 [2017-11-24 00:47:20]
lr 0.010599978800063602
 16M: 0.006018  8M: 0.006301  4M: 0.006067  2M: 0.008857  1M: 0.012034 merged: 0.005194
epoch: 116 [2017-11-24 00:52:34]
lr 0.00917985092043157
 16M: 0.006267  8M: 0.006608  4M: 0.006253  2M: 0.009163  1M: 0.012380 merged: 0.005394
epoch: 117 [2017-11-24 00:57:47]
lr 0.007495316889958615
 16M: 0.005957  8M: 0.006325  4M: 0.006119  2M: 0.009010  1M: 0.012262 merged: 0.005183
epoch: 118 [2017-11-24 01:03:06]
lr 0.005299989400031801
 16M: 0.005933  8M: 0.006285  4M: 0.006095  2M: 0.008876  1M: 0.012189 merged: 0.005155
epoch: 119 [2017-11-24 01:08:24]
lr 1e-08
 16M: 0.005949  8M: 0.006343  4M: 0.006142  2M: 0.008976  1M: 0.012232 merged: 0.005150

Visualize Graph


In [7]:
from graphviz import Digraph
import torch
from torch.autograd import Variable


def make_dot(var, params=None):
    """ Produces Graphviz representation of PyTorch autograd graph
    Blue nodes are the Variables that require grad, orange are Tensors
    saved for backward in torch.autograd.Function
    Args:
        var: output Variable
        params: dict of (name, Variable) to add names to node that
            require grad (TODO: make optional)
    """
    if params is not None:
        assert isinstance(params.values()[0], Variable)
        param_map = {id(v): k for k, v in params.items()}

    node_attr = dict(style='filled',
                     shape='box',
                     align='left',
                     fontsize='12',
                     ranksep='0.1',
                     height='0.2')
    dot = Digraph(node_attr=node_attr, graph_attr=dict(size="10240,10240"), format='svg')
    seen = set()

    def size_to_str(size):
        return '('+(', ').join(['%d' % v for v in size])+')'

    def add_nodes(var):
        if var not in seen:
            if torch.is_tensor(var):
                dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange')
            elif hasattr(var, 'variable'):
                u = var.variable
                name = param_map[id(u)] if params is not None else ''
                node_name = '%s\n %s' % (name, size_to_str(u.size()))
                dot.node(str(id(var)), node_name, fillcolor='lightblue')
            else:
                dot.node(str(id(var)), str(type(var).__name__))
            seen.add(var)
            if hasattr(var, 'next_functions'):
                for u in var.next_functions:
                    if u[0] is not None:
                        dot.edge(str(id(u[0])), str(id(var)))
                        add_nodes(u[0])
            if hasattr(var, 'saved_tensors'):
                for t in var.saved_tensors:
                    dot.edge(str(id(t)), str(id(var)))
                    add_nodes(t)
    add_nodes(var.grad_fn)
    return dot

In [8]:
x = Variable(torch.zeros(1,3,256,256))
y = net(x.cuda())
g = make_dot(y[-1])

In [9]:
g.render('net-pretrained_scale_{}'.format(pretrained_scale))


---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/graphviz/backend.py in render(engine, format, filepath, quiet)
    123         try:
--> 124             subprocess.check_call(args, startupinfo=STARTUPINFO, stderr=stderr)
    125         except OSError as e:

~/anaconda3/lib/python3.6/subprocess.py in check_call(*popenargs, **kwargs)
    285     """
--> 286     retcode = call(*popenargs, **kwargs)
    287     if retcode:

~/anaconda3/lib/python3.6/subprocess.py in call(timeout, *popenargs, **kwargs)
    266     """
--> 267     with Popen(*popenargs, **kwargs) as p:
    268         try:

~/anaconda3/lib/python3.6/subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors)
    708                                 errread, errwrite,
--> 709                                 restore_signals, start_new_session)
    710         except:

~/anaconda3/lib/python3.6/subprocess.py in _execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session)
   1343                             err_msg += ': ' + repr(err_filename)
-> 1344                     raise child_exception_type(errno_num, err_msg, err_filename)
   1345                 raise child_exception_type(err_msg)

FileNotFoundError: [Errno 2] No such file or directory: 'dot': 'dot'

During handling of the above exception, another exception occurred:

ExecutableNotFound                        Traceback (most recent call last)
<ipython-input-9-a8aee31c1e27> in <module>()
----> 1 g.render('net-pretrained_scale_{}'.format(pretrained_scale))

~/anaconda3/lib/python3.6/site-packages/graphviz/files.py in render(self, filename, directory, view, cleanup)
    173         filepath = self.save(filename, directory)
    174 
--> 175         rendered = backend.render(self._engine, self._format, filepath)
    176 
    177         if cleanup:

~/anaconda3/lib/python3.6/site-packages/graphviz/backend.py in render(engine, format, filepath, quiet)
    125         except OSError as e:
    126             if e.errno == errno.ENOENT:
--> 127                 raise ExecutableNotFound(args)
    128             else:  # pragma: no cover
    129                 raise

ExecutableNotFound: failed to execute ['dot', '-Tsvg', '-O', 'net-pretrained_scale_4'], make sure the Graphviz executables are on your systems' PATH

In [ ]: