In [2]:
# mostly from: https://github.com/HIPS/autograd/blob/master/examples/lstm.py
In [1]:
import autograd.numpy as np
import autograd.numpy.random as npr
from autograd import value_and_grad
from autograd.util import quick_grad_check
from scipy.optimize import minimize
class WeightsParser(object):
"""A helper class to index into a parameter vector."""
def __init__(self):
self.idxs_and_shapes = {}
self.num_weights = 0
def add_shape(self, name, shape):
start = self.num_weights
self.num_weights += np.prod(shape)
self.idxs_and_shapes[name] = (slice(start, self.num_weights), shape)
def get(self, vect, name):
idxs, shape = self.idxs_and_shapes[name]
return np.reshape(vect[idxs], shape)
def sigmoid(x):
return 0.5*(np.tanh(x) + 1.0) # Output ranges from 0 to 1.
def activations(weights, *args):
cat_state = np.concatenate(args + (np.ones((args[0].shape[0],1)),), axis=1)
return np.dot(cat_state, weights)
def logsumexp(X, axis=1):
max_X = np.max(X)
return max_X + np.log(np.sum(np.exp(X - max_X), axis=axis, keepdims=True))
def build_lstm(input_size, state_size, output_size):
"""Builds functions to compute the output of an LSTM."""
parser = WeightsParser()
parser.add_shape('init_cells', (1, state_size))
parser.add_shape('init_hiddens', (1, state_size))
parser.add_shape('change', (input_size + state_size + 1, state_size))
parser.add_shape('forget', (input_size + 2 * state_size + 1, state_size))
parser.add_shape('ingate', (input_size + 2 * state_size + 1, state_size))
parser.add_shape('outgate', (input_size + 2 * state_size + 1, state_size))
parser.add_shape('predict', (state_size + 1, output_size))
def update_lstm(input, hiddens, cells, forget_weights, change_weights,
ingate_weights, outgate_weights):
"""One iteration of an LSTM layer."""
change = np.tanh(activations(change_weights, input, hiddens))
forget = sigmoid(activations(forget_weights, input, cells, hiddens))
ingate = sigmoid(activations(ingate_weights, input, cells, hiddens))
cells = cells * forget + ingate * change
outgate = sigmoid(activations(outgate_weights, input, cells, hiddens))
hiddens = outgate * np.tanh(cells)
return hiddens, cells
def outputs(weights, inputs):
"""Goes from right to left, updating the state."""
forget_weights = parser.get(weights, 'forget')
change_weights = parser.get(weights, 'change')
ingate_weights = parser.get(weights, 'ingate')
outgate_weights = parser.get(weights, 'outgate')
predict_weights = parser.get(weights, 'predict')
num_sequences = inputs.shape[1]
hiddens = np.repeat(parser.get(weights, 'init_hiddens'), num_sequences, axis=0)
cells = np.repeat(parser.get(weights, 'init_cells'), num_sequences, axis=0)
output = []
for input in inputs: # Iterate over time steps.
hiddens, cells = update_lstm(input, hiddens, cells, forget_weights,
change_weights, ingate_weights, outgate_weights)
cur_output = activations(predict_weights, hiddens)
output.append(cur_output - logsumexp(cur_output))
return output # Output normalized log-probabilities.
def loss(weights, inputs, targets):
logprobs = outputs(weights, inputs)
loss_sum = 0.0
for t in xrange(len(targets)): # For every time step
loss_sum -= np.sum(logprobs[t] * targets[t])
return loss_sum / targets.shape[0] / targets.shape[1]
def frac_err(weights, inputs, targets):
return np.mean(np.argmax(targets, axis=2) != np.argmax(outputs(weights, inputs), axis=2))
return outputs, loss, frac_err, parser.num_weights
In [2]:
def propagator(x_prev):
return x_prev + npr.randn(len(x_prev))
In [3]:
sequence = np.ones((10000,10))
for i in range(sequence.shape[0]-1):
sequence[i+1] = propagator(sequence[i])
In [44]:
test_sequence = np.ones((10000,10))
for i in range(test_sequence.shape[0]-1):
test_sequence[i+1] = propagator(test_sequence[i])
test_sequences = np.array([test_sequence[i:i+50] for i in np.arange(len(test_sequence))[::100]])
In [4]:
sequence
Out[4]:
In [5]:
import matplotlib.pyplot as plt
%matplotlib inline
In [6]:
plt.plot(sequence)
Out[6]:
In [20]:
pred_fun, loss_fun, frac_err, num_weights = build_lstm(sequence.shape[1],2,sequence.shape[1])
In [21]:
sequence.shape,num_weights
Out[21]:
In [26]:
sequences = np.array([sequence[i:i+50] for i in np.arange(len(sequence))[::100]])
sequences.shape
Out[26]:
In [29]:
np.array(pred_fun(npr.randn(num_weights),sequences)).shape
Out[29]:
In [30]:
loss_and_grad = value_and_grad(loss_fun)
In [34]:
train_inputs = train_targets = sequences
In [35]:
def training_loss_and_grad(weights):
return loss_and_grad(weights, train_inputs, train_targets)
In [38]:
init_weights = npr.randn(num_weights)
quick_grad_check(loss_fun, init_weights, (train_inputs, train_targets))
In [40]:
plt.plot(training_loss_and_grad(init_weights)[1])
Out[40]:
In [41]:
def callback(weights):
print("Train loss:", loss_fun(weights, train_inputs, train_targets))
result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG',
options={'maxiter':10},callback=callback)
In [43]:
result.x
Out[43]:
In [49]:
loss_fun(result.x,test_sequences,test_sequences)
Out[49]:
In [50]:
from msmbuilder.example_datasets import FsPeptide
fs = FsPeptide().get()
In [52]:
from msmbuilder.featurizer import DihedralFeaturizer
dhft = DihedralFeaturizer().fit_transform(fs.trajectories)
In [61]:
n_atoms = fs.trajectories[0].n_atoms
n_atoms
Out[61]:
In [56]:
dhft[0].shape,len(dhft)
Out[56]:
In [59]:
train_inputs = train_targets = dhft[:20]
In [62]:
pred_fun, loss_fun, frac_err, num_weights = build_lstm(n_atoms,10,n_atoms)
In [ ]:
pre