In [ ]:
# Goal: learn a feedforward network's parameters such that the
In [918]:
# Adapted from: https://github.com/HIPS/autograd/blob/master/examples/neural_net.py
import autograd.numpy as np
import autograd.numpy.random as npr
from autograd import grad
from autograd.util import quick_grad_check
class WeightsParser(object):
"""A helper class to index into a parameter vector."""
def __init__(self):
self.idxs_and_shapes = {}
self.N = 0
def add_weights(self, name, shape):
start = self.N
self.N += np.prod(shape)
self.idxs_and_shapes[name] = (slice(start, self.N), shape)
def get(self, vect, name):
idxs, shape = self.idxs_and_shapes[name]
return np.reshape(vect[idxs], shape)
def linear_loss(d_close,d_far):
''' we want a higher penalty value when d_close > d_far '''
return d_close - d_far
def linear_loss_normed(d_close,d_far):
''' we want a higher penalty value when d_close > d_far '''
return (d_close - d_far) / (d_close + d_far)
def exp_loss(d_close,d_far,scale=5.0):
sign = d_close >= d_far
return (2*sign-1)*np.exp(scale*np.abs(d_close - d_far))
def make_nn_funs(layer_sizes, activation=np.tanh, loss_fxn=linear_loss,ortho_penalty_param=0.01):
parser = WeightsParser()
for i, shape in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
parser.add_weights(('weights', i), shape)
parser.add_weights(('biases', i), (1, shape[1]))
def predictions(W_vect, X):
"""Outputs normalized log-probabilities."""
cur_units = X
N_iter = len(layer_sizes) - 1
for i in range(len(layer_sizes) - 1):
cur_W = parser.get(W_vect, ('weights', i))
cur_B = parser.get(W_vect, ('biases', i))
cur_units = np.dot(cur_units, cur_W) + cur_B
cur_units = activation(cur_units)
return cur_units
def distance(x,y):
return np.sqrt(np.dot(x-y,x-y))
def orthonormal_columns_penalty(A):
return np.sum(np.abs(np.dot(A.T,A) - np.eye(A.shape[1]))**2)
def loss(W_vect, X_triplet,penalty_param=ortho_penalty_param):
y,y_close,y_far = predictions(W_vect,X_triplet)
d_close = distance(y,y_close)
d_far = distance(y,y_far)
ortho_penalty = 0
if penalty_param>0:
for i in range(len(layer_sizes)-1):
ortho_penalty += penalty_param*orthonormal_columns_penalty(parser.get(W_vect, ('weights',i)))
return loss_fxn(d_close,d_far) + ortho_penalty
#log_prior = -L2_reg * np.dot(W_vect, W_vect)
#log_lik = d_close - d_far
#return - log_prior - log_lik
return parser.N, predictions, loss
In [896]:
layer_sizes = [10,2]
n_weights,pred_fun,loss_fun = make_nn_funs(layer_sizes,activation = lambda i:i)
In [897]:
pred_fun(npr.randn(n_weights),(npr.randn(10),npr.randn(10),npr.randn(10)))
Out[897]:
In [898]:
def tripletify_trajectory(X,tau_1=5,tau_2=20):
X_triplets = []
for i in range(len(X) - tau_2):
X_triplets.append((X[i],X[i+tau_1],X[i+tau_2]))
return X_triplets
In [170]:
from msmbuilder.example_datasets import FsPeptide
fs = FsPeptide().get()
from msmbuilder.featurizer import DihedralFeaturizer
dhft = DihedralFeaturizer().fit_transform(fs.trajectories)
In [899]:
tau_1 = 1
tau_2 = 100
triplets = [tripletify_trajectory(dhft_,tau_1,tau_2) for dhft_ in dhft]
In [900]:
X = np.vstack(triplets)
In [1011]:
X.shape
Out[1011]:
In [902]:
n_dihedrals = X.shape[-1]
In [1006]:
#layer_sizes = [n_dihedrals,int(n_dihedrals/2),int(n_dihedrals/4),2]
#layer_sizes = [n_dihedrals,n_dihedrals*2,n_dihedrals,n_dihedrals/2,2]
layer_sizes = [n_dihedrals,20,5]
def relu(X):
X[X<0]=0
return X
def softplus(x):
return np.log(1+np.exp(x))
n_weights,pred_fun,loss_fun = make_nn_funs(layer_sizes,activation=relu,loss_fxn=linear_loss_normed,ortho_penalty_param=0)
In [1013]:
X[X<0].shape
Out[1013]:
In [1007]:
layer_sizes,n_weights
Out[1007]:
In [1008]:
grad_loss = grad(loss_fun)
In [1009]:
grad_loss(npr.randn(n_weights),X[0])
In [987]:
%timeit grad_loss(npr.randn(n_weights),X[0])
In [988]:
n_iter=1000
npr.seed(0)
weights = np.zeros((n_iter,n_weights))
weights[0] = npr.randn(n_weights)
In [989]:
step_size=1.0
for i in range(1,n_iter):
weights[i] = weights[i-1] - step_size*np.nan_to_num(grad_loss(weights[i-1],X[npr.randint(len(X))]))
In [990]:
weights
Out[990]:
In [991]:
grad_loss(weights[-1],X[0])
Out[991]:
In [992]:
weights[-1]
Out[992]:
In [993]:
import matplotlib.pyplot as plt
%matplotlib inline
In [994]:
plt.plot(weights[:,:10]);
plt.xlabel('Iteration')
plt.ylabel('Parameter value')
plt.title('Metric Learning by SGD -- Optimization trace')
Out[994]:
In [995]:
plt.plot(weights[:150,:10]);
plt.xlabel('Iteration')
plt.ylabel('Parameter value')
plt.title('Metric Learning by SGD -- Optimization trace')
Out[995]:
In [952]:
diff = weights[1:] - weights[:-1]
diff.shape,np.abs(diff).sum(0).shape
Out[952]:
In [891]:
diff
Out[891]:
In [724]:
plt.hist(np.abs(diff).sum(0)/n_weights,bins=50,normed=True);
plt.xlabel('Stochastic update magnitude')
plt.ylabel('Frequency')
plt.title('Distribution of update magnitudes')
Out[724]:
In [703]:
import seaborn as sns
sns.kdeplot(np.abs(diff).sum(0)/n_weights)
plt.xlabel('Stochastic update magnitude')
plt.ylabel('Frequency')
plt.title('Distribution of update magnitudes')
Out[703]:
In [742]:
def batch_loss(w,X,loss_fun=loss_fun):
return sum([loss_fun(w,x) for x in X])
In [663]:
from time import time
t = time()
batch_loss(weights[0],X)
t1 = time()
print(t1-t)
In [740]:
npr.seed(0)
sample_size=100000
mask = npr.randint(0,len(X),sample_size)
X_sample = X[mask]
In [764]:
batch_loss(weights[0],X_sample),batch_loss(weights[-1],X_sample)
Out[764]:
In [667]:
npr.seed(0)
sample_size=10000
mask = npr.randint(0,len(X),sample_size)
X_sample = X[mask]
In [668]:
from time import time
t = time()
batch_loss(weights[0],X_sample)
t1 = time()
print(t1-t)
In [669]:
len(weights[:1000][::10])
Out[669]:
In [705]:
losses = [batch_loss(w,X_sample) for w in weights[:1000][::10]]
plt.plot(losses)
In [616]:
print(fs.DESCR)
In [996]:
# before training
X_pred = pred_fun(weights[0],X[:,0,:])
plt.scatter(X_pred[:,0],X_pred[:,1],linewidths=0,s=2,c=np.arange(len(X_pred)),cmap='rainbow')
plt.figure()
# after training: : tau_1 = 1, tau_2 = 100,
# layer_sizes = [n_dihedrals,2]
# activation=lambda i:i,loss_fxn=linear_loss_normed,ortho_penalty_param=0)
X_pred = pred_fun(weights[-1],X[:,0,:])
plt.scatter(X_pred[:,0],X_pred[:,1],linewidths=0,s=2,c=np.arange(len(X_pred)),cmap='rainbow')
from sklearn.decomposition import PCA
plt.figure()
X_pred = PCA(2).fit_transform(pred_fun(weights[-1],X[:,0,:]))
plt.scatter(X_pred[:,0],X_pred[:,1],linewidths=0,s=2,c=np.arange(len(X_pred)),cmap='rainbow')
In [832]:
Out[832]:
In [636]:
# before training
X_pred = pred_fun(weights[0] / np.sum(weights[0]),X[:,0,:])
plt.scatter(X_pred[:,0],X_pred[:,1],linewidths=0,s=2,c=np.arange(len(X_pred)),cmap='rainbow')
plt.figure()
# after training
X_pred = pred_fun(weights[-1] / np.sum(weights[-1]),X[:,0,:])
plt.scatter(X_pred[:,0],X_pred[:,1],linewidths=0,s=2,c=np.arange(len(X_pred)),cmap='rainbow')
Out[636]:
In [ ]:
In [637]:
from msmbuilder.decomposition import tICA
tica = tICA(2,10)
In [638]:
X_tica = np.vstack(tica.fit_transform(dhft))
In [639]:
plt.scatter(X_tica[:,0],X_tica[:,1],linewidths=0,s=2,c=np.arange(len(X_tica)),cmap='rainbow')
Out[639]:
In [954]:
from msmbuilder.cluster import MiniBatchKMeans
from msmbuilder.msm import MarkovStateModel
from sklearn.pipeline import Pipeline
results = dict()
embeddings = [('tICA',X_tica),('Triplet',X_pred)]
for (name,dataset) in embeddings:
pipeline = Pipeline([
('cluster', MiniBatchKMeans(n_clusters=200)),
('msm', MarkovStateModel(lag_time=10))
])
pipeline.fit([dataset[:100000]])
results[name] = pipeline
print(name,pipeline.score([dataset[100000:]]))
In [ ]: