In [ ]:
import torch
import torch.nn as nn
import torch.nn.init as init
from model import RecurrentModel
reg = RecurrentModel(inputSize=4096, nHidden=[4096,1024, 64*32], \
noutputs=64*32, batchSize=1, ship2gpu=True, \
numLayers=1)
# for m in reg.modules():
# if(isinstance(m, nn.LSTM)):
# mkeys = m.state_dict().keys()
# mvals = m.state_dict().values()
# init.uniform(mvals[1], 0, 1)
# print(mvals[1])
"""
for m in reg.lstm3.modules():
for t in m.state_dict().values():
print init.uniform(t, 0, 1)
"""
for m in reg.modules():
if (isinstance, nn.LSTM):
keys = m.state_dict().keys()
print(keys)
print lstm1.weight_ih_l0
#weight_ih_l
<bound method OrderedDict.keys of OrderedDict([('lstm1.weight_ih_l0',
-8.3120e-03 1.2229e-02 3.6626e-03 ... 1.0745e-02 -8.7211e-03 -1.5729e-03
1.1770e-02 8.0955e-03 -8.6257e-03 ... 1.5129e-02 7.3832e-03 1.3534e-02
-1.0262e-02 7.2235e-03 -9.3444e-03 ... -1.0786e-02 -5.3581e-03 -1.0654e-02
... ⋱ ...
8.9649e-03 5.1380e-03 3.0401e-03 ... 7.2392e-03 -1.4466e-02 1.1002e-02
-1.3891e-02 1.4393e-02 -9.4514e-03 ... 1.5096e-02 8.0308e-03 6.3942e-04
-4.8369e-03 5.4814e-03 2.7434e-03 ... -1.0601e-03 1.3128e-02 -1.0100e-02
[torch.DoubleTensor of size 16384x4096]
), ('lstm1.weight_hh_l0',
9.0681e-03 8.3512e-03 1.3259e-02 ... 7.6777e-03 -1.2583e-02 6.7293e-03
1.0038e-02 -5.6466e-03 -3.2015e-03 ... 5.7920e-03 -9.6507e-03 -9.7868e-03
-1.2772e-02 -1.2567e-02 -3.3892e-03 ... -6.2676e-03 1.0279e-02 4.5665e-03
... ⋱ ...
-7.3647e-03 8.1241e-03 -8.5932e-03 ... -1.0691e-02 1.0714e-02 -6.9571e-03
-1.0909e-02 4.1533e-03 -7.1867e-03 ... -1.3236e-02 -6.4171e-03 1.5319e-03
7.5106e-03 6.4188e-03 3.8898e-03 ... 7.8048e-03 1.1788e-02 -2.3650e-04
[torch.DoubleTensor of size 16384x4096]
), ('lstm2.weight_ih_l0',
-2.5146e-03 2.1260e-02 -2.9394e-02 ... -3.1298e-03 2.5282e-02 -7.5236e-03
2.5535e-03 1.6201e-02 -9.2805e-03 ... 3.4387e-03 -2.7266e-02 4.8494e-03
1.6374e-03 2.8726e-02 -2.4216e-02 ... 1.6411e-02 1.8289e-02 1.8194e-02
... ⋱ ...
-1.9026e-03 6.2290e-03 -2.7997e-02 ... 1.4752e-02 -8.5351e-03 -1.3339e-02
2.0920e-02 5.8202e-03 6.1783e-03 ... 1.2403e-02 3.0006e-02 -7.8444e-03
-2.6174e-03 -5.8558e-04 2.1114e-02 ... -1.4445e-02 2.4024e-02 -2.5441e-02
[torch.DoubleTensor of size 4096x4096]
), ('lstm2.weight_hh_l0',
2.6991e-02 -2.5666e-02 -1.3833e-02 ... 1.0607e-02 2.4720e-02 -1.0858e-02
-6.1580e-03 2.6632e-02 -1.4935e-02 ... -6.6396e-03 1.1162e-04 -1.6596e-03
2.4643e-02 -1.5914e-02 3.0812e-02 ... -7.7535e-03 2.1454e-02 9.7609e-03
... ⋱ ...
-3.0703e-02 1.7707e-02 9.1310e-04 ... -1.0758e-02 3.0963e-02 -2.0343e-02
-2.0886e-02 -6.7411e-03 -2.7328e-02 ... -1.3424e-02 -9.6237e-05 -1.3324e-02
-2.6809e-02 -5.1521e-03 -2.2948e-02 ... -8.6834e-03 9.3513e-03 -6.9752e-03
[torch.DoubleTensor of size 4096x1024]
), ('lstm3.weight_ih_l0',
-4.0340e-03 4.1465e-04 -1.5683e-02 ... -1.1026e-02 -1.0947e-02 1.6227e-02
1.7910e-02 5.4558e-03 -1.9902e-03 ... -1.4846e-02 -1.8834e-02 -5.4759e-03
-1.7049e-02 -1.3265e-02 9.0059e-04 ... 1.3888e-02 -1.3095e-02 6.8316e-03
... ⋱ ...
1.6905e-02 1.0512e-02 -1.9430e-02 ... 1.2730e-02 -2.1872e-02 1.3590e-03
-1.8652e-02 5.4559e-03 -4.5743e-03 ... 1.1046e-02 -1.1336e-02 1.3728e-02
-9.3221e-03 -1.7549e-02 2.4111e-03 ... -2.1925e-02 3.6455e-03 4.9774e-03
[torch.DoubleTensor of size 8192x1024]
), ('lstm3.weight_hh_l0',
-2.1804e-02 -1.7305e-02 -1.2676e-02 ... 8.5824e-03 -3.9467e-03 4.3354e-03
-3.5676e-03 8.7299e-03 1.7373e-02 ... -1.6218e-02 6.6952e-03 -8.8986e-03
2.2167e-03 -3.0886e-03 -1.1764e-02 ... 3.9540e-03 1.2533e-02 4.8047e-03
... ⋱ ...
1.1770e-02 6.7463e-03 2.8238e-03 ... -1.1753e-02 1.2864e-02 1.4006e-02
-1.0686e-02 -1.8376e-02 7.6939e-03 ... 1.5753e-02 7.8034e-03 5.5767e-03
-1.1706e-02 -1.0713e-02 1.1022e-02 ... 6.5962e-03 1.8819e-02 -2.0634e-02
[torch.DoubleTensor of size 8192x2048]
)])>
<bound method OrderedDict.keys of OrderedDict()>
In [4]:
help(m)
Help on LSTM in module torch.nn.modules.rnn object:
class LSTM(RNNBase)
| Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence.
|
|
| For each element in the input sequence, each layer computes the following
| function:
|
| .. math::
|
| \begin{array}{ll}
| i_t = sigmoid(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\
| f_t = sigmoid(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \\
| g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hc} h_{(t-1)} + b_{hg}) \\
| o_t = sigmoid(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \\
| c_t = f_t * c_{(t-1)} + i_t * g_t \\
| h_t = o_t * \tanh(c_t)
| \end{array}
|
| where :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the cell state at time `t`,
| :math:`x_t` is the hidden state of the previous layer at time `t` or :math:`input_t` for the first layer,
| and :math:`i_t`, :math:`f_t`, :math:`g_t`, :math:`o_t` are the input, forget,
| cell, and out gates, respectively.
|
| Args:
| input_size: The number of expected features in the input x
| hidden_size: The number of features in the hidden state h
| num_layers: Number of recurrent layers.
| bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True
| batch_first: If True, then the input and output tensors are provided as (batch, seq, feature)
| dropout: If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer
| bidirectional: If True, becomes a bidirectional RNN. Default: False
|
| Inputs: input, (h_0, c_0)
| - **input** (seq_len, batch, input_size): tensor containing the features of the input sequence.
| The input can also be a packed variable length sequence. See :func:`torch.nn.utils.rnn.pack_padded_sequence`
| for details.
| - **h_0** (num_layers \* num_directions, batch, hidden_size): tensor containing
| the initial hidden state for each element in the batch.
| - **c_0** (num_layers \* num_directions, batch, hidden_size): tensor containing
| the initial cell state for each element in the batch.
|
|
| Outputs: output, (h_n, c_n)
| - **output** (seq_len, batch, hidden_size * num_directions): tensor containing
| the output features `(h_t)` from the last layer of the RNN, for each t. If a
| :class:`torch.nn.utils.rnn.PackedSequence` has been given as the input, the output will also be a
| packed sequence.
| - **h_n** (num_layers * num_directions, batch, hidden_size): tensor containing the hidden state for t=seq_len
| - **c_n** (num_layers * num_directions, batch, hidden_size): tensor containing the cell state for t=seq_len
|
| Attributes:
| weight_ih_l[k] : the learnable input-hidden weights of the k-th layer `(W_ii|W_if|W_ig|W_io)`, of shape
| `(input_size x 4*hidden_size)`
| weight_hh_l[k] : the learnable hidden-hidden weights of the k-th layer `(W_hi|W_hf|W_hg|W_ho)`, of shape
| `(hidden_size x 4*hidden_size)`
| bias_ih_l[k] : the learnable input-hidden bias of the k-th layer `(b_ii|b_if|b_ig|b_io)`, of shape
| `(4*hidden_size)`
| bias_hh_l[k] : the learnable hidden-hidden bias of the k-th layer `(W_hi|W_hf|W_hg|b_ho)`, of shape
| `(4*hidden_size)`
|
| Examples::
|
| >>> rnn = nn.LSTM(10, 20, 2)
| >>> input = Variable(torch.randn(5, 3, 10))
| >>> h0 = Variable(torch.randn(2, 3, 20))
| >>> c0 = Variable(torch.randn(2, 3, 20))
| >>> output, hn = rnn(input, (h0, c0))
|
| Method resolution order:
| LSTM
| RNNBase
| torch.nn.modules.module.Module
| __builtin__.object
|
| Methods defined here:
|
| __init__(self, *args, **kwargs)
|
| ----------------------------------------------------------------------
| Methods inherited from RNNBase:
|
| __repr__(self)
|
| __setstate__(self, d)
|
| forward(self, input, hx=None)
|
| reset_parameters(self)
|
| ----------------------------------------------------------------------
| Data descriptors inherited from RNNBase:
|
| all_weights
|
| ----------------------------------------------------------------------
| Methods inherited from torch.nn.modules.module.Module:
|
| __call__(self, *input, **kwargs)
|
| __delattr__(self, name)
|
| __dir__(self)
|
| __getattr__(self, name)
|
| __setattr__(self, name, value)
|
| add_module(self, name, module)
| Adds a child module to the current module.
|
| The module can be accessed as an attribute using the given name.
|
| apply(self, fn)
|
| children(self)
| Returns an iterator over immediate children modules.
|
| cpu(self, device_id=None)
| Moves all model parameters and buffers to the CPU.
|
| cuda(self, device_id=None)
| Moves all model parameters and buffers to the GPU.
|
| Arguments:
| device_id (int, optional): if specified, all parameters will be
| copied to that device
|
| double(self)
| Casts all parameters and buffers to double datatype.
|
| eval(self)
| Sets the module in evaluation mode.
|
| This has any effect only on modules such as Dropout or BatchNorm.
|
| float(self)
| Casts all parameters and buffers to float datatype.
|
| half(self)
| Casts all parameters and buffers to half datatype.
|
| load_state_dict(self, state_dict)
| Copies parameters and buffers from :attr:`state_dict` into
| this module and its descendants. The keys of :attr:`state_dict` must
| exactly match the keys returned by this module's :func:`state_dict()`
| function.
|
| Arguments:
| state_dict (dict): A dict containing parameters and
| persistent buffers.
|
| modules(self)
| Returns an iterator over all modules in the network.
|
| Note:
| Duplicate modules are returned only once. In the following
| example, ``l`` will be returned only once.
|
| >>> l = nn.Linear(2, 2)
| >>> net = nn.Sequential(l, l)
| >>> for idx, m in enumerate(net.modules()):
| >>> print(idx, '->', m)
| 0 -> Sequential (
| (0): Linear (2 -> 2)
| (1): Linear (2 -> 2)
| )
| 1 -> Linear (2 -> 2)
|
| named_children(self)
| Returns an iterator over immediate children modules, yielding both
| the name of the module as well as the module itself.
|
| Example:
| >>> for name, module in model.named_children():
| >>> if name in ['conv4', 'conv5']:
| >>> print(module)
|
| named_modules(self, memo=None, prefix='')
| Returns an iterator over all modules in the network, yielding
| both the name of the module as well as the module itself.
|
| Note:
| Duplicate modules are returned only once. In the following
| example, ``l`` will be returned only once.
|
| >>> l = nn.Linear(2, 2)
| >>> net = nn.Sequential(l, l)
| >>> for idx, m in enumerate(net.named_modules()):
| >>> print(idx, '->', m)
| 0 -> ('', Sequential (
| (0): Linear (2 -> 2)
| (1): Linear (2 -> 2)
| ))
| 1 -> ('0', Linear (2 -> 2))
|
| named_parameters(self, memo=None, prefix='')
| Returns an iterator over module parameters, yielding both the
| name of the parameter as well as the parameter itself
|
| Example:
| >>> for name, param in self.named_parameters():
| >>> if name in ['bias']:
| >>> print(param.size())
|
| parameters(self, memo=None)
| Returns an iterator over module parameters.
|
| This is typically passed to an optimizer.
|
| Example:
| >>> for param in model.parameters():
| >>> print(type(param.data), param.size())
| <class 'torch.FloatTensor'> (20L,)
| <class 'torch.FloatTensor'> (20L, 1L, 5L, 5L)
|
| register_backward_hook(self, hook)
| Registers a backward hook on the module.
|
| The hook will be called every time the gradients with respect to module
| inputs are computed. The hook should have the following signature::
|
| hook(module, grad_input, grad_output) -> Tensor or None
|
| The :attr:`grad_input` and :attr:`grad_output` may be tuples if the
| module has multiple inputs or outputs. The hook should not modify its
| arguments, but it can optionally return a new gradient with respect to
| input that will be used in place of :attr:`grad_input` in subsequent
| computations.
|
| This function returns a handle with a method ``handle.remove()``
| that removes the hook from the module.
|
| register_buffer(self, name, tensor)
| Adds a persistent buffer to the module.
|
| This is typically used to register a buffer that should not to be
| considered a model parameter. For example, BatchNorm's ``running_mean``
| is not a parameter, but is part of the persistent state.
|
| Buffers can be accessed as attributes using given names.
|
| Example:
| >>> self.register_buffer('running_mean', torch.zeros(num_features))
|
| register_forward_hook(self, hook)
| Registers a forward hook on the module.
|
| The hook will be called every time :func:`forward` computes an output.
| It should have the following signature::
|
| hook(module, input, output) -> None
|
| The hook should not modify the input or output.
| This function returns a handle with a method ``handle.remove()``
| that removes the hook from the module.
|
| register_parameter(self, name, param)
| Adds a parameter to the module.
|
| The parameter can be accessed as an attribute using given name.
|
| share_memory(self)
|
| state_dict(self, destination=None, prefix='')
| Returns a dictionary containing a whole state of the module.
|
| Both parameters and persistent buffers (e.g. running averages) are
| included. Keys are corresponding parameter and buffer names.
|
| Example:
| >>> module.state_dict().keys()
| ['bias', 'weight']
|
| train(self, mode=True)
| Sets the module in training mode.
|
| This has any effect only on modules such as Dropout or BatchNorm.
|
| type(self, dst_type)
|
| zero_grad(self)
| Sets gradients of all model parameters to zero.
|
| ----------------------------------------------------------------------
| Data descriptors inherited from torch.nn.modules.module.Module:
|
| __dict__
| dictionary for instance variables (if defined)
|
| __weakref__
| list of weak references to the object (if defined)
|
| ----------------------------------------------------------------------
| Data and other attributes inherited from torch.nn.modules.module.Module:
|
| dump_patches = False
In [5]:
mkeys = m.state_dict().keys()
print mkeys
['weight_ih_l0', 'weight_hh_l0']
Content source: lakehanne/ensenso
Similar notebooks: