In [1]:
import numpy as np
import math
import multiprocessing as mp
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from collections import namedtuple
from PIL import Image
import os
import os.path
import errno
import codecs
import copy
In [2]:
torch.manual_seed(0)
np.random.seed(0)
print("torch.cuda.device_count()", torch.cuda.device_count())
print("torch.cuda.current_device()", torch.cuda.current_device())
torch.cuda.set_device(2)
print("torch.cuda.current_device()", torch.cuda.current_device())
In [34]:
def compute_ranks(x):
"""
Returns ranks in [0, len(x))
Note: This is different from scipy.stats.rankdata, which returns ranks in [1, len(x)].
(https://github.com/openai/evolution-strategies-starter/blob/master/es_distributed/es.py)
"""
assert x.ndim == 1
ranks = np.empty(len(x), dtype=int)
ranks[x.argsort()] = np.arange(len(x))
return ranks
def compute_centered_ranks(x):
"""
https://github.com/openai/evolution-strategies-starter/blob/master/es_distributed/es.py
"""
y = compute_ranks(x.ravel()).reshape(x.shape).astype(np.float32)
y /= (x.size - 1)
y -= .5
return y
def compute_weight_decay(weight_decay, model_param_list):
model_param_grid = np.array(model_param_list)
return - weight_decay * np.mean(model_param_grid * model_param_grid, axis=1)
class CMAES:
'''CMA-ES wrapper.'''
def __init__(self, num_params, # number of model parameters
sigma_init=0.10, # initial standard deviation
popsize=255): # population size
self.num_params = num_params
self.sigma_init = sigma_init
self.popsize = popsize
self.solutions = None
import cma
self.es = cma.CMAEvolutionStrategy( self.num_params * [0],
self.sigma_init,
{'popsize': self.popsize})
def rms_stdev(self):
sigma = self.es.result()[6]
return np.mean(np.sqrt(sigma*sigma))
def ask(self):
'''returns a list of parameters'''
self.solutions = np.array(self.es.ask())
return self.solutions
def tell(self, reward_table_result):
reward_table = reward_table_result
self.es.tell(self.solutions, (-reward_table).tolist()) # convert minimizer to maximizer.
def done(self):
return self.es.stop()
def current_param(self):
return self.es.result()[5] # mean solution, presumably better with noise
def best_param(self):
return self.es.result()[0] # best evaluated solution
def result(self): # return best params so far, along with historically best reward, curr reward, sigma
r = self.es.result()
return (r[0], -r[1], -r[1], r[6])
class SimpleES:
'''Simple Evolution Strategies.'''
def __init__(self, num_params, # number of model parameters
sigma_init=0.10, # initial standard deviation
sigma_alpha=0.20, # learning rate for standard deviation
sigma_decay=0.999, # anneal standard deviation
sigma_limit=0.01, # stop annealing if less than this
popsize=255, # population size
elite_ratio=0.1, # percentage of the elites
done_threshold=1e-6, # threshold when we say we are done
average_baseline=True, # set baseline to average of batch
forget_best=True): # only use the best from latest generation
self.num_params = num_params
self.sigma_init = sigma_init
self.sigma_alpha = sigma_alpha
self.sigma_decay = sigma_decay
self.sigma_limit = sigma_limit
self.popsize = popsize
self.average_baseline = average_baseline
if self.average_baseline:
assert (self.popsize & 2), "Population size must be even"
self.batch_size = int(self.popsize / 2)
else:
assert (self.popsize & 1), "Population size must be odd"
self.batch_size = int((self.popsize - 1) / 2)
self.elite_ratio = elite_ratio
self.elite_popsize = int(self.popsize * self.elite_ratio)
self.forget_best = forget_best
self.batch_reward = np.zeros(self.batch_size * 2)
self.mu = np.zeros(self.num_params)
self.sigma = np.ones(self.num_params) * self.sigma_init
self.curr_best_mu = np.zeros(self.num_params)
self.best_mu = np.zeros(self.num_params)
self.best_reward = 0
self.first_interation = True
self.done_threshold = done_threshold
def rms_stdev(self):
sigma = self.sigma
return np.mean(np.sqrt(sigma*sigma))
def ask(self):
'''returns a list of parameters'''
# antithetic sampling
self.epsilon = np.random.randn(self.batch_size, self.num_params) * self.sigma.reshape(1, self.num_params)
self.epsilon_full = np.concatenate([self.epsilon, - self.epsilon])
if self.average_baseline:
epsilon = self.epsilon_full
else:
# first population is mu, then positive epsilon, then negative epsilon
epsilon = np.concatenate([np.zeros((1, self.num_params)), self.epsilon_full])
solutions = self.mu.reshape(1, self.num_params) + epsilon
return solutions
def tell(self, reward_table_result):
# input must be a numpy float array
assert(len(reward_table_result) == self.popsize), "Inconsistent reward_table size reported."
reward_table = reward_table_result
reward_offset = 1
if self.average_baseline:
b = np.mean(reward_table)
reward_offset = 0
else:
b = reward_table[0] # baseline
reward = reward_table[reward_offset:]
idx = np.argsort(reward)[::-1][0:self.elite_popsize]
best_reward = reward[idx[0]]
if (best_reward > b or self.average_baseline):
best_mu = self.mu + self.epsilon_full[idx[0]]
best_reward = reward[idx[0]]
else:
best_mu = self.mu
best_reward = b
self.curr_best_reward = best_reward
self.curr_best_mu = best_mu
if self.first_interation:
self.first_interation = False
self.best_reward = self.curr_best_reward
self.best_mu = best_mu
else:
if self.forget_best or (self.curr_best_reward > self.best_reward):
self.best_mu = best_mu
self.best_reward = self.curr_best_reward
# adaptive sigma
# normalization
stdev_reward = reward.std()
epsilon = self.epsilon
sigma = self.sigma
S = ((epsilon * epsilon - (sigma * sigma).reshape(1, self.num_params)) / sigma.reshape(1, self.num_params))
reward_avg = (reward[:self.batch_size] + reward[self.batch_size:]) / 2.0
rS = reward_avg - b
delta_sigma = (np.dot(rS, S)) / (2 * self.batch_size * stdev_reward)
# move mean to the average of the best idx means
self.mu += self.epsilon_full[idx].mean(axis=0)
# adjust sigma according to the adaptive sigma calculation
change_sigma = self.sigma_alpha * delta_sigma
change_sigma = np.minimum(change_sigma, self.sigma)
change_sigma = np.maximum(change_sigma, - 0.5 * self.sigma)
self.sigma += change_sigma
self.sigma[self.sigma > self.sigma_limit] *= self.sigma_decay
def done(self):
return (self.rms_stdev() < self.done_threshold)
def current_param(self):
return self.curr_best_mu
def best_param(self):
return self.best_mu
def result(self): # return best params so far, along with historically best reward, curr reward, sigma
return (self.best_mu, self.best_reward, self.curr_best_reward, self.sigma)
class SimpleGA:
'''Simple Genetic Algorithm.'''
def __init__(self, num_params, # number of model parameters
sigma_init=0.1, # initial standard deviation
sigma_decay=0.999, # anneal standard deviation
sigma_limit=0.01, # stop annealing if less than this
popsize=255, # population size
elite_ratio=0.1, # percentage of the elites
forget_best=False, # forget the historical best elites
done_threshold=1e-6): # threshold when we say we are done
self.num_params = num_params
self.sigma_init = sigma_init
self.sigma_decay = sigma_decay
self.sigma_limit = sigma_limit
self.popsize = popsize
self.elite_ratio = elite_ratio
self.elite_popsize = int(self.popsize * self.elite_ratio)
self.sigma = self.sigma_init
self.elite_params = np.zeros((self.elite_popsize, self.num_params))
self.elite_rewards = np.zeros(self.elite_popsize)
self.best_param = np.zeros(self.num_params)
self.best_reward = 0
self.first_iteration = True
self.forget_best = forget_best
self.done_threshold = done_threshold
def rms_stdev(self):
return self.sigma # same sigma for all parameters.
def ask(self):
'''returns a list of parameters'''
# antithetic sampling
self.epsilon = np.random.randn(self.popsize, self.num_params) * self.sigma
solutions = []
def mate(a, b):
c = np.copy(a)
idx = np.where(np.random.rand((c.size)) > 0.5)
c[idx] = b[idx]
return c
elite_range = range(self.elite_popsize)
for i in range(self.popsize):
idx_a = np.random.choice(elite_range)
idx_b = np.random.choice(elite_range)
child_params = mate(self.elite_params[idx_a], self.elite_params[idx_b])
solutions.append(child_params + self.epsilon[i])
solutions = np.array(solutions)
self.solutions = solutions
return solutions
def tell(self, reward_table_result):
# input must be a numpy float array
assert(len(reward_table_result) == self.popsize), "Inconsistent reward_table size reported."
if (not self.forget_best or self.first_iteration):
reward = reward_table_result
solution = self.solutions
else:
reward = np.concatenate([reward_table_result, self.elite_rewards])
solution = np.concatenate([self.solutions, self.elite_params])
idx = np.argsort(reward)[::-1][0:self.elite_popsize]
self.elite_rewards = reward[idx]
self.elite_params = solution[idx]
self.curr_best_reward = self.elite_rewards[0]
if self.first_iteration or (self.curr_best_reward > self.best_reward):
self.first_iteration = False
self.best_reward = self.elite_rewards[0]
self.best_param = np.copy(self.elite_params[0])
if (self.sigma > self.sigma_limit):
self.sigma *= self.sigma_decay
def done(self):
return (self.rms_stdev() < self.done_threshold)
def current_param(self):
return self.elite_params[0]
def best_param(self):
return self.best_param
def result(self): # return best params so far, along with historically best reward, curr reward, sigma
return (self.best_param, self.best_reward, self.curr_best_reward, self.sigma)
class OpenES:
''' Basic Version of OpenAI Evolution Strategies.'''
def __init__(self, num_params, # number of model parameters
sigma_init=0.1, # initial standard deviation
sigma_decay=0.999, # anneal standard deviation
sigma_limit=0.01, # stop annealing if less than this
learning_rate=0.001, # learning rate for standard deviation
learning_rate_decay = 0.9999, # annealing the learning rate
learning_rate_limit = 0.001, # stop annealing learning rate
popsize=255, # population size
antithetic=False, # whether to use antithetic sampling
forget_best=True): # forget historical best
self.num_params = num_params
self.sigma_decay = sigma_decay
self.sigma = sigma_init
self.sigma_limit = sigma_limit
self.learning_rate = learning_rate
self.learning_rate_decay = learning_rate_decay
self.learning_rate_limit = learning_rate_limit
self.popsize = popsize
self.antithetic = antithetic
if self.antithetic:
assert (self.popsize & 2), "Population size must be even"
self.half_popsize = int(self.popsize / 2)
self.reward = np.zeros(self.popsize)
self.mu = np.zeros(self.num_params)
self.best_mu = np.zeros(self.num_params)
self.best_reward = 0
self.first_interation = True
self.forget_best = forget_best
def rms_stdev(self):
sigma = self.sigma
return np.mean(np.sqrt(sigma*sigma))
def ask(self):
'''returns a list of parameters'''
# antithetic sampling
if self.antithetic:
self.epsilon_half = np.random.randn(self.half_popsize, self.num_params)
self.epsilon = np.concatenate([self.epsilon_half, - self.epsilon_half])
else:
self.epsilon = np.random.randn(self.popsize, self.num_params)
self.solutions = self.mu.reshape(1, self.num_params) + self.epsilon * self.sigma
return self.solutions
def tell(self, reward):
# input must be a numpy float array
assert(len(reward) == self.popsize), "Inconsistent reward_table size reported."
idx = np.argsort(reward)[::-1]
best_reward = reward[idx[0]]
best_mu = self.solutions[idx[0]]
self.curr_best_reward = best_reward
self.curr_best_mu = best_mu
if self.first_interation:
self.first_interation = False
self.best_reward = self.curr_best_reward
self.best_mu = best_mu
else:
if self.forget_best or (self.curr_best_reward > self.best_reward):
self.best_mu = best_mu
self.best_reward = self.curr_best_reward
# main bit:
# standardize the rewards to have a gaussian distribution
normalized_reward = (reward - np.mean(reward)) / np.std(reward)
self.mu += self.learning_rate/(self.popsize*self.sigma)*np.dot(self.epsilon.T, normalized_reward)
# adjust sigma according to the adaptive sigma calculation
if (self.sigma > self.sigma_limit):
self.sigma *= self.sigma_decay
if (self.learning_rate > self.learning_rate_limit):
self.learning_rate *= self.learning_rate_decay
def done(self):
return False
def current_param(self):
return self.curr_best_mu
def best_param(self):
return self.best_mu
def result(self): # return best params so far, along with historically best reward, curr reward, sigma
return (self.best_mu, self.best_reward, self.curr_best_reward, self.sigma)
In [35]:
Args = namedtuple('Args', ['batch_size', 'test_batch_size', 'epochs', 'lr', 'cuda', 'seed', 'log_interval'])
In [36]:
args = Args(batch_size=100, test_batch_size=1000, epochs=30, lr=0.001, cuda=True, seed=0, log_interval=10)
In [37]:
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
In [38]:
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('MNIST_data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
batch_size=args.batch_size, shuffle=True, **kwargs)
valid_loader = train_loader
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('MNIST_data', train=False, transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])),
batch_size=args.batch_size, shuffle=True, **kwargs)
In [39]:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.num_filter1 = 8
self.num_filter2 = 16
self.num_padding = 2
# input is 28x28
# padding=2 for same padding
self.conv1 = nn.Conv2d(1, self.num_filter1, 5, padding=self.num_padding)
# feature map size is 14*14 by pooling
# padding=2 for same padding
self.conv2 = nn.Conv2d(self.num_filter1, self.num_filter2, 5, padding=self.num_padding)
# feature map size is 7*7 by pooling
self.fc = nn.Linear(self.num_filter2*7*7, 10)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), 2)
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, self.num_filter2*7*7) # reshape Variable
x = self.fc(x)
return F.log_softmax(x)
In [40]:
NPOPULATION = 101
weight_decay_coef = 0.1
In [41]:
'''
models = []
for i in range(NPOPULATION):
model = Net()
if args.cuda:
model.cuda()
model.eval()
models.append(model)
'''
model = Net()
if args.cuda:
model.cuda()
orig_model = copy.deepcopy(model)
In [42]:
# get init params
orig_params = []
model_shapes = []
for param in orig_model.parameters():
p = param.data.cpu().numpy()
model_shapes.append(p.shape)
orig_params.append(p.flatten())
orig_params_flat = np.concatenate(orig_params)
NPARAMS = len(orig_params_flat)
print(NPARAMS)
In [43]:
def update_model(flat_param, model, model_shapes):
idx = 0
i = 0
for param in model.parameters():
delta = np.product(model_shapes[i])
block = flat_param[idx:idx+delta]
block = np.reshape(block, model_shapes[i])
i += 1
idx += delta
block_data = torch.from_numpy(block).float()
if args.cuda:
block_data = block_data.cuda()
param.data = block_data
In [44]:
def evaluate(model, test_loader, print_mode=True, return_loss=False):
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
acc = correct / len(test_loader.dataset)
if print_mode:
print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * acc))
if return_loss:
return test_loss
return acc
In [48]:
"""
es = SimpleES(NPARAMS,
popsize=NPOPULATION,
sigma_init=0.01,
sigma_decay=0.999,
sigma_alpha=0.2,
sigma_limit=0.001,
elite_ratio=0.1,
average_baseline=False,
forget_best=True
)
"""
es = SimpleGA(NPARAMS,
popsize=NPOPULATION,
forget_best=False,
sigma_init=0.01,
sigma_decay=0.9999,
sigma_limit=0.01
)
In [49]:
def worker(procnum, model, solution, data, target, send_end):
update_model(solution, model, model_shapes)
output = model(data)
loss = F.nll_loss(output, target)
reward = - loss.data[0]
send_end.send(reward)
def batch_simulation(model_list, solutions, data, target, process_count):
jobs = []
pipe_list = []
for i in range(process_count):
recv_end, send_end = mp.Pipe(False)
p = mp.Process(target=worker, args=(i, model_list[i], solutions[i], data, target, send_end))
jobs.append(p)
pipe_list.append(recv_end)
for p in jobs:
p.start()
for p in jobs:
p.join()
result_list = [x.recv() for x in pipe_list]
return np.array(result_list)
def batch_simulation_sequential(model_list, solutions, data, target, process_count):
result_list = []
for i in range(process_count):
update_model(solutions[i], model_list[i], model_shapes)
output = model_list[i](data)
loss = F.nll_loss(output, target)
reward = - loss.data[0]
result_list.append(reward)
return np.array(result_list)
In [50]:
#'''
best_valid_acc = 0
training_log = []
for epoch in range(1, 10*args.epochs + 1):
# train loop
model.eval()
for batch_idx, (data, target) in enumerate(train_loader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
solutions = es.ask()
reward = np.zeros(es.popsize)
for i in range(es.popsize):
update_model(solutions[i], model, model_shapes)
output = model(data)
loss = F.nll_loss(output, target)
reward[i] = - loss.data[0]
best_raw_reward = reward.max()
reward = compute_centered_ranks(reward)
l2_decay = compute_weight_decay(weight_decay_coef, solutions)
reward += l2_decay
es.tell(reward)
result = es.result()
if (batch_idx % 50 == 0):
print(epoch, batch_idx, best_raw_reward, result[0].mean(), result[3])
curr_solution = es.current_param()
update_model(curr_solution, model, model_shapes)
valid_acc = evaluate(model, valid_loader, print_mode=False)
training_log.append([epoch, valid_acc])
print('valid_acc', valid_acc * 100.)
if valid_acc >= best_valid_acc:
best_valid_acc = valid_acc
best_model = copy.deepcopy(model)
print('best valid_acc', best_valid_acc * 100.)
#'''
In [ ]:
evaluate(best_model, valid_loader, print_mode=True)
In [ ]:
evaluate(best_model, test_loader, print_mode=True)
In [ ]:
evaluate(best_model, train_loader, print_mode=True)
In [ ]:
update_model(es.best_param(), model, model_shapes)
In [ ]:
evaluate(model, valid_loader, print_mode=True)
In [ ]:
evaluate(model, test_loader, print_mode=True)
In [ ]:
evaluate(model, train_loader, print_mode=True)
In [ ]:
update_model(es.current_param(), model, model_shapes)
In [ ]:
evaluate(model, valid_loader, print_mode=True)
In [ ]:
evaluate(model, test_loader, print_mode=True)
In [ ]:
evaluate(model, train_loader, print_mode=True)
In [ ]:
eval_acc = evaluate(best_model, test_loader)
print('final test acc', eval_acc * 100.)
In [ ]:
param_count = 0
for param in model.parameters():
print(param.data.shape)
param_count += np.product(param.data.shape)
print(param_count)
In [ ]:
orig_params = []
for param in orig_model.parameters():
orig_params.append(param.data.cpu().numpy().flatten())
In [ ]:
orig_params_flat = np.concatenate(orig_params)
In [ ]:
import matplotlib.pyplot as plt
In [ ]:
_ = plt.hist(orig_params_flat, bins=200)
plt.show()
In [ ]:
final_params = []
for param in best_model.parameters():
final_params.append(param.data.cpu().numpy().flatten())
final_params_flat = np.concatenate(final_params)
In [ ]:
_ = plt.hist(final_params_flat, bins=200)
plt.show()
In [ ]:
In [ ]: