In [ ]:
import data_science.j_utils as j_utils
# import data_science.lendingclub.dataprep_and_modeling.modeling_utils.data_prep_new as data_prep
# import dir_constants as dc
# from sklearn.externals import joblib
# import torch
# import torch.nn as nn
# import torch.optim as optim
# from torch.autograd import Variable
# import torch.nn.functional as F
# from torch.utils.data import Dataset, DataLoader
# import time
# from sklearn.metrics import mean_squared_error
from tqdm import tqdm_notebook
# import matplotlib.pyplot as plt
%matplotlib notebook
In [ ]:
%%writefile /home/justin/justin_tinkering/data_science/j_utils.py
# File with utility functions for models in pytorch
# Imports ___________________________________________________________________
import os
import re
import torch
import torchvision.datasets as datasets
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm_notebook
from torchvision.utils import make_grid
from torch.autograd import Variable
import pandas as pd #for some debugging
import shutil, errno
# copy function
def copyanything(src, dst):
try:
shutil.copytree(src, dst)
except OSError as exc: # python >2.5
if exc.errno == errno.ENOTDIR:
shutil.copy(src, dst)
else: raise
# Optimizers not put in master yet __________________________________________
class Nadam(torch.optim.Optimizer):
"""Implements Nadam algorithm (a variant of Adam based on Nesterov momentum).
It has been proposed in `Incorporating Nesterov Momentum into Adam`__.
Arguments:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups
lr (float, optional): learning rate (default: 2e-3)
betas (Tuple[float, float], optional): coefficients used for computing
running averages of gradient and its square
eps (float, optional): term added to the denominator to improve
numerical stability (default: 1e-8)
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
schedule_decay (float, optional): momentum schedule decay (default: 4e-3)
__ http://cs229.stanford.edu/proj2015/054_report.pdf
__ http://www.cs.toronto.edu/~fritz/absps/momentum.pdf
"""
def __init__(self, params, lr=2e-3, betas=(0.9, 0.999), eps=1e-8,
weight_decay=0, schedule_decay=4e-3):
defaults = dict(lr=lr, betas=betas, eps=eps,
weight_decay=weight_decay, schedule_decay=schedule_decay)
super(Nadam, self).__init__(params, defaults)
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
loss = closure()
for group in self.param_groups:
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.data
state = self.state[p]
# State initialization
if len(state) == 0:
state['step'] = 0
state['m_schedule'] = 1.
state['exp_avg'] = grad.new().resize_as_(grad).zero_()
state['exp_avg_sq'] = grad.new().resize_as_(grad).zero_()
# Warming momentum schedule
m_schedule = state['m_schedule']
schedule_decay = group['schedule_decay']
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
beta1, beta2 = group['betas']
eps = group['eps']
state['step'] += 1
if group['weight_decay'] != 0:
grad = grad.add(group['weight_decay'], p.data)
momentum_cache_t = beta1 * \
(1. - 0.5 * (0.96 ** (state['step'] * schedule_decay)))
momentum_cache_t_1 = beta1 * \
(1. - 0.5 *
(0.96 ** ((state['step'] + 1) * schedule_decay)))
m_schedule_new = m_schedule * momentum_cache_t
m_schedule_next = m_schedule * momentum_cache_t * momentum_cache_t_1
state['m_schedule'] = m_schedule_new
# Decay the first and second moment running average coefficient
bias_correction2 = 1 - beta2 ** state['step']
exp_avg.mul_(beta1).add_(1 - beta1, grad)
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
exp_avg_sq_prime = exp_avg_sq.div(1. - bias_correction2)
denom = exp_avg_sq_prime.sqrt_().add_(group['eps'])
p.data.addcdiv_(-group['lr'] * (1. - momentum_cache_t) /
(1. - m_schedule_new), grad, denom)
p.data.addcdiv_(-group['lr'] * momentum_cache_t_1 /
(1. - m_schedule_next), exp_avg, denom)
return loss
# Visualizing Functions _____________________________________________________
def show_image(image):
'''Shows a PIL image'''
plt.figure()
plt.imshow(image)
plt.show()
def get_example_image(image_fullpaths):
'''From an array-like, choose one image'''
return Image.open(np.random.choice(image_fullpaths))
# Making Datasets ___________________________________________________________
class TestDataset(Dataset):
"""Args: path to dir, transforms; makes test dataset for images"""
def __init__(self, root_dir, transform=None):
self.root_dir = root_dir
self.transform = transform
self.samples = [filename for filename in os.listdir(root_dir) if '.jpg' in filename]
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
img_name = os.path.join(self.root_dir, self.samples[idx])
image = Image.open(img_name)
if self.transform:
image = self.transform(image)
#image = Image.fromarray(image)
return image, int(re.findall(r'\d+', self.samples[idx])[0])
class DfNumpyDataset(Dataset):
'''Makes dataset from df or numpy array of data and targets'''
def __init__(self, data, targets):
self.data = data
self.targets = targets
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx,:], self.targets[idx,:]
def get_dataset(data, targets):
return DfNumpyDataset(data, targets)
def get_image_dataset(root, tsfm=None):
'''Makes dataset from images following structure of root/class/img.png'''
return datasets.ImageFolder(root, transform=tsfm)
def get_loader(dataset, use_cuda=True, batch_size=64, shuffle=False):
'''Makes iterator/batcher for iterating over to train nn. Feed it a
Dataset (e.g. get_image_dataset)'''
return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, pin_memory=use_cuda)
def precompute_vals(model, data_loader, test_set=False):
'''Given a model (up to where computation is wanted) and data loader, precomputes values'''
outputs_list = []
labels_list = []
for i, data in tqdm_notebook(enumerate(data_loader)):
# get inputs and labels
inputs, labels = data
# wrap in Variable
try:
inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
labels = labels.view(-1)
except AttributeError:
inputs = Variable(inputs.cuda())
# forward pass
outputs = model(inputs)
outputs_list.append(outputs)
labels_list.append(labels)
del inputs
del labels
outputs_ret = torch.cat(outputs_list)
try:
labels_ret = torch.cat(labels_list)
except TypeError:
labels_ret = [ids for sublist in labels_list for ids in sublist]
return outputs_ret, labels_ret
# if test_set==False:
# else:
# # for test sets
# outputs_list = []
# ids_list = []
# for i, data in tqdm_notebook(enumerate(data_loader)):
# # get inputs and ids
# inputs, ids = data
# # wrap in Variable
# inputs, ids = Variable(inputs.cuda()), ids
# # forward pass
# outputs = model(inputs)
# outputs_list.append(outputs)
# ids_list.extend(ids)
# del inputs
# del ids
# return torch.cat(outputs_list), ids_list
def save_precompute(X, y, path, model_name, filename):
precom_savedir = make_savedir(path, 'precom_'+model_name)
X_converted = X.data.cpu().numpy()
try:
y_converted = y.data.cpu().numpy().reshape(-1,1)
except AttributeError:
y_converted = np.array(y).reshape(-1,1)
torch.save((X_converted,y_converted), os.path.join(precom_savedir, filename))
print('Saved at {0}'.format(os.path.join(precom_savedir, filename)))
# Image Standardization _____________________________________________________
def get_mean_rgb(train_paths):
'''given an array-like of all paths to train, under structure of
root/class/img, will return mean rgb scaled to 0-1 (from 0-255)'''
return np.array([(np.array(Image.open(path)) / 255).mean(0).mean(0) for path in tqdm_notebook(train_paths)]).mean(0)
def get_std_dev_rgb(train_paths, mean_rgb):
'''given an array-like of all paths to train, under structure of
root/class/img, will return std_dev rgb scaled to 0-1 (from 0-255)'''
return np.array([(((np.array(Image.open(path)) / 255) - mean_rgb)**2).mean(0).mean(0) for path in tqdm_notebook(train_paths)]).mean(0)**.5
class UnNormalize(object):
'''To undo a Normalize transform.'''
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, tensor):
"""
Args:
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
Returns:
Tensor: Normalized image.
"""
for t, m, s in zip(tensor, self.mean, self.std):
t.mul_(s).add_(m)
# The normalize code -> t.sub_(m).div_(s)
return tensor
# Dir Functions _____________________________________________________________
# different dir functions for classification tasks vs neural style transfer
def get_image_classes_clf(root):
'''Returns a list of classes when structure is root/class/...'''
return [file for file in os.listdir(root) if '.DS_Store' not in file]
def get_image_fullpaths_clf(root, img_fmt):
'''Returns list of full paths to samples in root assuming root/class/...'''
return [os.path.join(root, classes, path) for classes in get_image_classes_clf(root) for path in os.listdir(
os.path.join(root, classes)) if img_fmt in path]
def get_image_fullpaths_nst(root, img_fmt):
'''Returns list of full paths to samples in root assuming root/img.jpeg'''
return [os.path.join(root, file) for file in os.listdir(root) if img_fmt in file]
# Training Models ___________________________________________________________
def make_savedir(path, model_name):
'''Makes a savedir to hold saves if the dir does not exist. returns savedir path'''
savedir = os.path.join(path, 'save_{0}'.format(model_name))
if not os.path.isdir(savedir):
os.mkdir(savedir)
return savedir
def train_model(model, model_name, train_loader, valid_loader, optimizer, criterion, n_epochs, save_epoch, savedir, variance_pct_thrsh, patience_epoch, pct_change, decay_rate, continue_training=False, g_epoch=1, verbose=False, lr_scheduler=False, early_stop=False):
'''Basic setup for training models and saving every multiple of save_epoch.
This assumes the full model can fit on one gpu. If different parts are
on different GPUs, will need to specifically set devices numbers in
cuda calls based on architecture of model.
model = pytorch model
model_name = model name
train_loader = loader made from train dataset
valid_loader = loader made from valid dataset
optimizer = choice of optimizer
criterion = loss function
n_epochs = number of epochs to train for
save_epochs = save a state dict every save_epochs
savedir = dir to save state dicts in
patience_epoch = epochs to wait where change in loss is below pct_change
before decaying learning rate
pct_change = the percent change difference desired in loss
decay_rate = float from 0-1 which will multiply the current learning rate
continue_training = specifies of training is continuing or fresh
g_epoch = what global train epoch number. 1 if training fresh.
returns g_epoch to keep track of how many training epochs
'''
epoch_list = []
loss_list = []
train_accuracy_list = []
valid_accuracy_list = []
lr_list = []
early_stopping_flag = 0
fig = plt.figure()
ax1 = fig.add_subplot(131)
ax2 = fig.add_subplot(132)
ax3 = fig.add_subplot(133)
plt.ion()
fig.show()
fig.canvas.draw()
k = 0
if continue_training:
try:
g_epoch = g_epoch[0]
except:
pass
epochs = range(g_epoch, g_epoch + n_epochs)
for epoch in tqdm_notebook(epochs):
# epoch stats for plotting
correct = 0
seen = 0
current_loss = 0.0
g_epoch += 1
for i, data in enumerate(train_loader):
# get inputs and labels
inputs, labels = data
# wrap in Variable
inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
labels = labels.view(-1)
# zero the gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
# import pdb; pdb.set_trace()
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# statistic updates
current_loss += loss.data[0]
seen += len(inputs)
# import pdb; pdb.set_trace()
correct += (labels == outputs.max(1)[1]).cpu().data.numpy().sum()
# Plotting ___________________________________________________________
epoch_list.append(epoch)
if not loss_list:
last_loss = 9999999
else:
last_loss = loss_list[-1]
valid_accuracy = check_accuracy(valid_loader, model)
try:
if valid_accuracy > highest_valid_acc:
save_best = True
highest_valid_acc = valid_accuracy
else:
save_best = False
except:
highest_valid_acc = 0
save_best = False
train_accuracy = float(correct)/float(seen)
if verbose == True:
print('epoch: {0}, loss: {3}, train_acc: {1}, valid_acc: {2}'.format(epoch,train_accuracy, valid_accuracy, current_loss))
loss_list.append(current_loss)
train_accuracy_list.append(train_accuracy)
valid_accuracy_list.append(valid_accuracy)
lr_list.append(optimizer.param_groups[0]['lr'])
ax1.clear()
ax1.plot(epoch_list, loss_list)
ax2.clear()
ax2.plot(epoch_list, train_accuracy_list, 'ro', label='train')
ax2.plot(epoch_list, valid_accuracy_list, label='valid')
ax3.plot(epoch_list, lr_list)
plt.title("Epoch: {0}, loss left, accuracy middle, learning rate right".format(epoch))
ax2.legend(loc='best')
fig.canvas.draw()
# To decay learning rate _____________________________________________
decrease_lr = 0
if lr_scheduler:
if train_accuracy > .93:
if decrease_lr % patience_epoch == 0:
optimizer.param_groups[0]['lr'] *= decay_rate
decrease_lr += 1
# if abs((last_loss - current_loss) / last_loss) < pct_change:
# k += 1
# if k >= patience_epoch:
# k = 0
# optimizer.param_groups[0]['lr'] *= decay_rate
# else:
# k = 0
# Conditionally save if variance is starting to grow btwn datasets, early stopping as well
if early_stop:
if (valid_accuracy < train_accuracy) & (((train_accuracy - valid_accuracy)/train_accuracy) > variance_pct_thrsh):
print("Epoch passing variance cutoff: {0}".format(epoch))
early_stopping_flag += 1
torch.save(model.state_dict(), os.path.join(
savedir, model_name + '_{0}'.format(epoch)))
if early_stopping_flag > patience_epoch*2:
print('Triggered early stopping flag')
break
else:
early_stopping_flag = 0
# Saving _____________________________________________________________
if (epoch) % save_epoch == 0:
torch.save(model.state_dict(), os.path.join(
savedir, model_name + '_{0}'.format(epoch)))
if save_best:
torch.save(model.state_dict(), os.path.join(
savedir, model_name + '_best'.format(epoch)))
# Free up cuda memory again ______________________________________________
del inputs
del labels
del loss
del optimizer
del criterion
return g_epoch, epoch_list, loss_list, train_accuracy_list, valid_accuracy_list
# Validation Functions ______________________________________________________
def get_classes_strings(classes, labels_ids):
# returns the classes in string format
return [classes[label_id] for label_id in labels_ids]
def get_prediction_classes_ids(predictions):
# returns the predictions in id format
predictions_ids = predictions.cpu().data.numpy().argmax(1)
return predictions_ids
def get_prediction_classes_strings(classes, predictions):
# returns the predictions in string format
return get_classes_strings(classes, get_prediction_classes_ids(predictions))
def show_iter(img):
plt.figure()
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest')
plt.show()
def predictions_vs_actuals(iterator, model, rvrs_tsfm, classes):
model.eval()
images, labels = iterator.next()
img_list = [rvrs_tsfm(img) for img in images]
labels_string = get_classes_strings(classes, labels.numpy())
show_iter(make_grid(img_list, padding=10))
# display the predictons for the images above
predictions = model(Variable(images.cuda()))
predictions_string = get_prediction_classes_strings(classes, predictions)
print('Actuals: ', labels_string)
print('Predictions: ', predictions_string)
del predictions
del images
del labels
def make_predictions(data_loader, model):
model.eval()
pred_list = []
for i, data in enumerate(data_loader):
images, labels = data
labels=labels.view(-1)
predictions = list(model(Variable(images.cuda())).max(1)[1].cpu().data)
pred_list.extend(predictions)
return pred_list
def check_accuracy(data_loader, model):
model.eval()
correct = 0
seen = 0
total_len = len(data_loader)
for i, data in enumerate(data_loader):
images, labels = data
labels=labels.view(-1)
seen += len(images)
predictions = model(Variable(images.cuda()))
# labels is tensor, predictions is variable; predictions pull data out to numpy
correct += (labels.numpy() == predictions.max(1)[1].cpu().data.numpy()).sum() #predictions.max(1)[1] returns indicies of max preds
# import pdb; pdb.set_trace()
del images
del labels
del predictions
# print('Accuracy: {0}, Saw: {1}, Correct: {2}'.format(correct/seen, seen, correct))
return float(correct)/float(seen)
In [ ]:
import importlib; importlib.reload(j_utils)
In [ ]:
1%10
In [ ]: