In [1]:
from msmbuilder.example_datasets import FsPeptide
from msmbuilder.featurizer import DihedralFeaturizer

fs = FsPeptide().get().trajectories
n_atoms = fs[0].n_atoms
fs_dih_feat = DihedralFeaturizer().transform(fs)

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

X_train = np.vstack([traj[:8000] for traj in fs_dih_feat])


loading trajectory_1.xtc...
loading trajectory_10.xtc...
loading trajectory_11.xtc...
loading trajectory_12.xtc...
loading trajectory_13.xtc...
loading trajectory_14.xtc...
loading trajectory_15.xtc...
loading trajectory_16.xtc...
loading trajectory_17.xtc...
loading trajectory_18.xtc...
loading trajectory_19.xtc...
loading trajectory_2.xtc...
loading trajectory_20.xtc...
loading trajectory_21.xtc...
loading trajectory_22.xtc...
loading trajectory_23.xtc...
loading trajectory_24.xtc...
loading trajectory_25.xtc...
loading trajectory_26.xtc...
loading trajectory_27.xtc...
loading trajectory_28.xtc...
loading trajectory_3.xtc...
loading trajectory_4.xtc...
loading trajectory_5.xtc...
loading trajectory_6.xtc...
loading trajectory_7.xtc...
loading trajectory_8.xtc...
loading trajectory_9.xtc...

In [20]:
from sklearn.decomposition import PCA
pca = PCA()
pca.fit(X_train)


Out[20]:
PCA(copy=True, n_components=None, whiten=False)

In [21]:
plt.plot(pca.explained_variance_ratio_)
plt.figure()
plt.plot(np.cumsum(pca.explained_variance_ratio_))


Out[21]:
[<matplotlib.lines.Line2D at 0x12cc59750>]

In [22]:
np.cumsum(pca.explained_variance_ratio_[:20])


Out[22]:
array([ 0.12631749,  0.20338393,  0.2642495 ,  0.31907303,  0.36841625,
        0.40719971,  0.44414426,  0.47682893,  0.50753376,  0.53572956,
        0.56200125,  0.58504052,  0.60675376,  0.62715691,  0.64506708,
        0.6616204 ,  0.67623384,  0.68999798,  0.70271882,  0.71422662])

In [25]:
pca = PCA(20)
pca.fit(X_train)


Out[25]:
PCA(copy=True, n_components=20, whiten=False)

In [26]:
y = pca.transform(X_train)[:,:20]

In [27]:
X_reconstructed = pca.inverse_transform(y)

In [28]:
np.mean(((X_reconstructed - X_train)**2).sum(1))


Out[28]:
5.2748456

In [33]:
np.mean(((X_train-X_train.mean(0))**2).sum(1))


Out[33]:
18.458143

In [30]:
5.


Out[30]:
(84,)

In [32]:
(X_train - X_train.mean(0)).shape


Out[32]:
(224000, 84)

In [36]:
plt.plot(X_train.mean(0))


Out[36]:
[<matplotlib.lines.Line2D at 0x12ed3a890>]

In [65]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
import numpy as np

# cut the sequence into semi-redundant sequences of maxlen steps
maxlen = 10
step = 3
sequences = []
next_frames = []
for traj in fs_dih_feat[:2]:
    for i in range(0, len(traj) - maxlen, step):
        sequences.append(traj[i : i + maxlen])
        next_frames.append(traj[i + maxlen])
print('nb sequences:', len(sequences))
ndim = fs_dih_feat[0].shape[1]

print('Vectorization...')
X = np.zeros((len(sequences), maxlen, ndim))
y = np.zeros((len(sequences), ndim))
for i, sequence in enumerate(sequences):
    X[i] = sequences[i]
    y[i] = next_frames[i]
    
X.shape,y.shape


nb sequences: 6660
Vectorization...
Out[65]:
((6660, 10, 84), (6660, 84))

In [68]:
ndim


Out[68]:
84

In [69]:
from keras.optimizers import SGD

print('Build model...')
model = Sequential()
model.add(LSTM(ndim, ndim*2, return_sequences=True))
#model.add(Dropout(0.2))
model.add(LSTM(ndim*2, ndim*2, return_sequences=False))
#model.add(Dropout(0.2))
model.add(Dense(ndim*2, ndim))

sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_squared_error', optimizer=sgd)


INFO (theano.gof.compilelock): Refreshing lock /Users/joshuafass/.theano/compiledir_Darwin-14.4.0-x86_64-i386-64bit-i386-2.7.10-64/lock_dir/lock
INFO:theano.gof.compilelock:Refreshing lock /Users/joshuafass/.theano/compiledir_Darwin-14.4.0-x86_64-i386-64bit-i386-2.7.10-64/lock_dir/lock
Build model...
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-69-0d922b3be50f> in <module>()
     10 
     11 sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
---> 12 model.compile(loss='mean_squared_error', optimizer=sgd)

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/Keras-0.1.2-py2.7.egg/keras/models.pyc in compile(self, optimizer, loss, class_mode, theano_mode)
    404                                       allow_input_downcast=True, mode=theano_mode)
    405         self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], updates=updates,
--> 406                                                allow_input_downcast=True, mode=theano_mode)
    407         self._predict = theano.function(predict_ins, self.y_test,
    408                                         allow_input_downcast=True, mode=theano_mode)

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/compile/function.pyc in function(inputs, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input)
    264                 allow_input_downcast=allow_input_downcast,
    265                 on_unused_input=on_unused_input,
--> 266                 profile=profile)
    267     # We need to add the flag check_aliased inputs if we have any mutable or
    268     # borrowed used defined inputs

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/compile/pfunc.pyc in pfunc(params, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input)
    509     return orig_function(inputs, cloned_outputs, mode,
    510             accept_inplace=accept_inplace, name=name, profile=profile,
--> 511             on_unused_input=on_unused_input)
    512 
    513 

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/compile/function_module.pyc in orig_function(inputs, outputs, mode, accept_inplace, name, profile, on_unused_input)
   1463                    accept_inplace=accept_inplace,
   1464                    profile=profile,
-> 1465                    on_unused_input=on_unused_input).create(
   1466                        defaults)
   1467 

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/compile/function_module.pyc in __init__(self, inputs, outputs, mode, accept_inplace, function_builder, profile, on_unused_input, fgraph)
   1158                         optimizer, inputs, outputs)
   1159                 else:
-> 1160                     optimizer_profile = optimizer(fgraph)
   1161 
   1162                 end_optimizer = time.time()

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/gof/opt.pyc in __call__(self, fgraph)
     88         Same as self.optimize(fgraph)
     89         """
---> 90         return self.optimize(fgraph)
     91 
     92     def add_requirements(self, fgraph):

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/gof/opt.pyc in optimize(self, fgraph, *args, **kwargs)
     79             orig = theano.tensor.basic.constant.enable
     80             theano.tensor.basic.constant.enable = False
---> 81             ret = self.apply(fgraph, *args, **kwargs)
     82         finally:
     83             theano.tensor.basic.constant.enable = orig

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/gof/opt.pyc in apply(self, fgraph)
    193             try:
    194                 t0 = time.time()
--> 195                 sub_prof = optimizer.optimize(fgraph)
    196                 l.append(float(time.time() - t0))
    197                 sub_profs.append(sub_prof)

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/gof/opt.pyc in optimize(self, fgraph, *args, **kwargs)
     79             orig = theano.tensor.basic.constant.enable
     80             theano.tensor.basic.constant.enable = False
---> 81             ret = self.apply(fgraph, *args, **kwargs)
     82         finally:
     83             theano.tensor.basic.constant.enable = orig

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/gof/opt.pyc in apply(self, fgraph, start_from)
   1835                         nb = change_tracker.nb_imported
   1836                         t_opt = time.time()
-> 1837                         lopt_change = self.process_node(fgraph, node, lopt)
   1838                         time_opts[lopt] += time.time() - t_opt
   1839                         if lopt_change:

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/gof/opt.pyc in process_node(self, fgraph, node, lopt)
   1525             return False
   1526         try:
-> 1527             fgraph.replace_all_validate(repl_pairs, reason=lopt)
   1528             return True
   1529         except Exception, e:

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/gof/toolbox.pyc in replace_all_validate(self, fgraph, replacements, reason, verbose)
    257         for r, new_r in replacements:
    258             try:
--> 259                 fgraph.replace(r, new_r, reason=reason, verbose=False)
    260             except Exception, e:
    261                 if ('The type of the replacement must be the same' not in

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/gof/fg.pyc in replace(self, r, new_r, reason, verbose)
    500         for node, i in list(r.clients):  # copy the client list for iteration
    501             assert (node == 'output' and self.outputs[i] is r) or (node.inputs[i] is r)
--> 502             self.change_input(node, i, new_r, reason=reason)
    503 
    504         # sometimes the following is triggered.  If you understand why, please explain to James.

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/gof/fg.pyc in change_input(self, node, i, new_r, reason)
    440             return
    441 
--> 442         self.__import_r__([new_r], reason=reason)
    443         self.__add_clients__(new_r, [(node, i)])
    444         prune = self.__remove_clients__(r, [(node, i)], False)

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/gof/fg.pyc in __import_r__(self, variables, reason)
    255         for apply_node in [r.owner for r in variables if r.owner is not None]:
    256             if apply_node not in self.apply_nodes:
--> 257                 self.__import__(apply_node, reason=reason)
    258         for r in variables:
    259             if r.owner is None and not isinstance(r, graph.Constant) and r not in self.inputs:

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/gof/fg.pyc in __import__(self, apply_node, check, reason)
    375                 self.__add_clients__(input, [(node, i)])
    376             assert node.fgraph is self
--> 377             self.execute_callbacks('on_import', node, reason)
    378 
    379     ### prune ###

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/gof/fg.pyc in execute_callbacks(self, name, *args, **kwargs)
    575                 continue
    576             tf0 = time.time()
--> 577             fn(self, *args, **kwargs)
    578             self.execute_callbacks_times[feature] += time.time() - tf0
    579         self.execute_callbacks_time += time.time() - t0

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/tensor/opt.pyc in on_import(self, fgraph, node, reason)
   1045         try:
   1046             o_shapes = shape_infer(node,
-> 1047                                    [self.shape_of[r] for r in node.inputs])
   1048         except ShapeError:
   1049             o_shapes = self.default_infer_shape(node, [self.shape_of[r] for

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/tensor/subtensor.pyc in infer_shape(self, node, shapes)
    532                     outshp.append(xl)
    533                 else:
--> 534                     cnf = get_canonical_form_slice(idx, xl)[0]
    535                     if cnf.step == 1:
    536                         length = cnf.stop - cnf.start

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/tensor/subtensor.pyc in get_canonical_form_slice(theslice, length)
    124 
    125         start, is_start_constant = analyze(theslice.start)
--> 126         stop, is_stop_constant = analyze(theslice.stop)
    127         step, is_step_constant = analyze(theslice.step)
    128         length, is_length_constant = analyze(length)

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/tensor/subtensor.pyc in analyze(x)
    116         def analyze(x):
    117             try:
--> 118                 x_constant = get_scalar_constant_value(x)
    119                 is_constant = True
    120             except theano.tensor.NotScalarConstantError:

/Users/joshuafass/anaconda/envs/py27/lib/python2.7/site-packages/theano/tensor/basic.pyc in get_scalar_constant_value(orig_v, elemwise, only_process_constants)
    588                 v = v.owner.inputs[0]
    589                 continue
--> 590             elif isinstance(v.owner.op, theano.compile.ops.Shape_i):
    591                 if isinstance(v.owner.inputs[0], Constant):
    592                     return numpy.asarray(v.owner.inputs[0].data.shape[v.owner.op.i])

KeyboardInterrupt: 

In [ ]: