notebook.community

Edit and run



In [ ]:

    
import torch
import torch.nn as nn
import torch.nn.init as init
from model import RecurrentModel
reg = RecurrentModel(inputSize=4096, nHidden=[4096,1024, 64*32], \
                     noutputs=64*32, batchSize=1, ship2gpu=True, \
                     numLayers=1)

# for m in reg.modules(): 
#     if(isinstance(m, nn.LSTM)): 
#         mkeys = m.state_dict().keys()
#         mvals = m.state_dict().values()
#         init.uniform(mvals[1], 0, 1)
#         print(mvals[1])
"""
for m in reg.lstm3.modules(): 
    for t in m.state_dict().values():
        print init.uniform(t, 0, 1)
"""

for m in reg.modules():
    if (isinstance, nn.LSTM):
        keys = m.state_dict().keys()
        print(keys)
        print lstm1.weight_ih_l0
#weight_ih_l









    



<bound method OrderedDict.keys of OrderedDict([('lstm1.weight_ih_l0', 
-8.3120e-03  1.2229e-02  3.6626e-03  ...   1.0745e-02 -8.7211e-03 -1.5729e-03
 1.1770e-02  8.0955e-03 -8.6257e-03  ...   1.5129e-02  7.3832e-03  1.3534e-02
-1.0262e-02  7.2235e-03 -9.3444e-03  ...  -1.0786e-02 -5.3581e-03 -1.0654e-02
                ...                   ⋱                   ...                
 8.9649e-03  5.1380e-03  3.0401e-03  ...   7.2392e-03 -1.4466e-02  1.1002e-02
-1.3891e-02  1.4393e-02 -9.4514e-03  ...   1.5096e-02  8.0308e-03  6.3942e-04
-4.8369e-03  5.4814e-03  2.7434e-03  ...  -1.0601e-03  1.3128e-02 -1.0100e-02
[torch.DoubleTensor of size 16384x4096]
), ('lstm1.weight_hh_l0', 
 9.0681e-03  8.3512e-03  1.3259e-02  ...   7.6777e-03 -1.2583e-02  6.7293e-03
 1.0038e-02 -5.6466e-03 -3.2015e-03  ...   5.7920e-03 -9.6507e-03 -9.7868e-03
-1.2772e-02 -1.2567e-02 -3.3892e-03  ...  -6.2676e-03  1.0279e-02  4.5665e-03
                ...                   ⋱                   ...                
-7.3647e-03  8.1241e-03 -8.5932e-03  ...  -1.0691e-02  1.0714e-02 -6.9571e-03
-1.0909e-02  4.1533e-03 -7.1867e-03  ...  -1.3236e-02 -6.4171e-03  1.5319e-03
 7.5106e-03  6.4188e-03  3.8898e-03  ...   7.8048e-03  1.1788e-02 -2.3650e-04
[torch.DoubleTensor of size 16384x4096]
), ('lstm2.weight_ih_l0', 
-2.5146e-03  2.1260e-02 -2.9394e-02  ...  -3.1298e-03  2.5282e-02 -7.5236e-03
 2.5535e-03  1.6201e-02 -9.2805e-03  ...   3.4387e-03 -2.7266e-02  4.8494e-03
 1.6374e-03  2.8726e-02 -2.4216e-02  ...   1.6411e-02  1.8289e-02  1.8194e-02
                ...                   ⋱                   ...                
-1.9026e-03  6.2290e-03 -2.7997e-02  ...   1.4752e-02 -8.5351e-03 -1.3339e-02
 2.0920e-02  5.8202e-03  6.1783e-03  ...   1.2403e-02  3.0006e-02 -7.8444e-03
-2.6174e-03 -5.8558e-04  2.1114e-02  ...  -1.4445e-02  2.4024e-02 -2.5441e-02
[torch.DoubleTensor of size 4096x4096]
), ('lstm2.weight_hh_l0', 
 2.6991e-02 -2.5666e-02 -1.3833e-02  ...   1.0607e-02  2.4720e-02 -1.0858e-02
-6.1580e-03  2.6632e-02 -1.4935e-02  ...  -6.6396e-03  1.1162e-04 -1.6596e-03
 2.4643e-02 -1.5914e-02  3.0812e-02  ...  -7.7535e-03  2.1454e-02  9.7609e-03
                ...                   ⋱                   ...                
-3.0703e-02  1.7707e-02  9.1310e-04  ...  -1.0758e-02  3.0963e-02 -2.0343e-02
-2.0886e-02 -6.7411e-03 -2.7328e-02  ...  -1.3424e-02 -9.6237e-05 -1.3324e-02
-2.6809e-02 -5.1521e-03 -2.2948e-02  ...  -8.6834e-03  9.3513e-03 -6.9752e-03
[torch.DoubleTensor of size 4096x1024]
), ('lstm3.weight_ih_l0', 
-4.0340e-03  4.1465e-04 -1.5683e-02  ...  -1.1026e-02 -1.0947e-02  1.6227e-02
 1.7910e-02  5.4558e-03 -1.9902e-03  ...  -1.4846e-02 -1.8834e-02 -5.4759e-03
-1.7049e-02 -1.3265e-02  9.0059e-04  ...   1.3888e-02 -1.3095e-02  6.8316e-03
                ...                   ⋱                   ...                
 1.6905e-02  1.0512e-02 -1.9430e-02  ...   1.2730e-02 -2.1872e-02  1.3590e-03
-1.8652e-02  5.4559e-03 -4.5743e-03  ...   1.1046e-02 -1.1336e-02  1.3728e-02
-9.3221e-03 -1.7549e-02  2.4111e-03  ...  -2.1925e-02  3.6455e-03  4.9774e-03
[torch.DoubleTensor of size 8192x1024]
), ('lstm3.weight_hh_l0', 
-2.1804e-02 -1.7305e-02 -1.2676e-02  ...   8.5824e-03 -3.9467e-03  4.3354e-03
-3.5676e-03  8.7299e-03  1.7373e-02  ...  -1.6218e-02  6.6952e-03 -8.8986e-03
 2.2167e-03 -3.0886e-03 -1.1764e-02  ...   3.9540e-03  1.2533e-02  4.8047e-03
                ...                   ⋱                   ...                
 1.1770e-02  6.7463e-03  2.8238e-03  ...  -1.1753e-02  1.2864e-02  1.4006e-02
-1.0686e-02 -1.8376e-02  7.6939e-03  ...   1.5753e-02  7.8034e-03  5.5767e-03
-1.1706e-02 -1.0713e-02  1.1022e-02  ...   6.5962e-03  1.8819e-02 -2.0634e-02
[torch.DoubleTensor of size 8192x2048]
)])>
<bound method OrderedDict.keys of OrderedDict()>



In [4]:

    
help(m)









    



Help on LSTM in module torch.nn.modules.rnn object:

class LSTM(RNNBase)
 |  Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence.
 |  
 |  
 |  For each element in the input sequence, each layer computes the following
 |  function:
 |  
 |  .. math::
 |  
 |          \begin{array}{ll}
 |          i_t = sigmoid(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\
 |          f_t = sigmoid(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \\
 |          g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hc} h_{(t-1)} + b_{hg}) \\
 |          o_t = sigmoid(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \\
 |          c_t = f_t * c_{(t-1)} + i_t * g_t \\
 |          h_t = o_t * \tanh(c_t)
 |          \end{array}
 |  
 |  where :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the cell state at time `t`,
 |  :math:`x_t` is the hidden state of the previous layer at time `t` or :math:`input_t` for the first layer,
 |  and :math:`i_t`, :math:`f_t`, :math:`g_t`, :math:`o_t` are the input, forget,
 |  cell, and out gates, respectively.
 |  
 |  Args:
 |      input_size: The number of expected features in the input x
 |      hidden_size: The number of features in the hidden state h
 |      num_layers: Number of recurrent layers.
 |      bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True
 |      batch_first: If True, then the input and output tensors are provided as (batch, seq, feature)
 |      dropout: If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer
 |      bidirectional: If True, becomes a bidirectional RNN. Default: False
 |  
 |  Inputs: input, (h_0, c_0)
 |      - **input** (seq_len, batch, input_size): tensor containing the features of the input sequence.
 |        The input can also be a packed variable length sequence. See :func:`torch.nn.utils.rnn.pack_padded_sequence`
 |        for details.
 |      - **h_0** (num_layers \* num_directions, batch, hidden_size): tensor containing
 |        the initial hidden state for each element in the batch.
 |      - **c_0** (num_layers \* num_directions, batch, hidden_size): tensor containing
 |        the initial cell state for each element in the batch.
 |  
 |  
 |  Outputs: output, (h_n, c_n)
 |      - **output** (seq_len, batch, hidden_size * num_directions): tensor containing
 |        the output features `(h_t)` from the last layer of the RNN, for each t. If a
 |        :class:`torch.nn.utils.rnn.PackedSequence` has been given as the input, the output will also be a
 |        packed sequence.
 |      - **h_n** (num_layers * num_directions, batch, hidden_size): tensor containing the hidden state for t=seq_len
 |      - **c_n** (num_layers * num_directions, batch, hidden_size): tensor containing the cell state for t=seq_len
 |  
 |  Attributes:
 |      weight_ih_l[k] : the learnable input-hidden weights of the k-th layer `(W_ii|W_if|W_ig|W_io)`, of shape
 |                       `(input_size x 4*hidden_size)`
 |      weight_hh_l[k] : the learnable hidden-hidden weights of the k-th layer `(W_hi|W_hf|W_hg|W_ho)`, of shape
 |                       `(hidden_size x 4*hidden_size)`
 |      bias_ih_l[k] : the learnable input-hidden bias of the k-th layer `(b_ii|b_if|b_ig|b_io)`, of shape
 |                       `(4*hidden_size)`
 |      bias_hh_l[k] : the learnable hidden-hidden bias of the k-th layer `(W_hi|W_hf|W_hg|b_ho)`, of shape
 |                       `(4*hidden_size)`
 |  
 |  Examples::
 |  
 |      >>> rnn = nn.LSTM(10, 20, 2)
 |      >>> input = Variable(torch.randn(5, 3, 10))
 |      >>> h0 = Variable(torch.randn(2, 3, 20))
 |      >>> c0 = Variable(torch.randn(2, 3, 20))
 |      >>> output, hn = rnn(input, (h0, c0))
 |  
 |  Method resolution order:
 |      LSTM
 |      RNNBase
 |      torch.nn.modules.module.Module
 |      __builtin__.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, *args, **kwargs)
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from RNNBase:
 |  
 |  __repr__(self)
 |  
 |  __setstate__(self, d)
 |  
 |  forward(self, input, hx=None)
 |  
 |  reset_parameters(self)
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from RNNBase:
 |  
 |  all_weights
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from torch.nn.modules.module.Module:
 |  
 |  __call__(self, *input, **kwargs)
 |  
 |  __delattr__(self, name)
 |  
 |  __dir__(self)
 |  
 |  __getattr__(self, name)
 |  
 |  __setattr__(self, name, value)
 |  
 |  add_module(self, name, module)
 |      Adds a child module to the current module.
 |      
 |      The module can be accessed as an attribute using the given name.
 |  
 |  apply(self, fn)
 |  
 |  children(self)
 |      Returns an iterator over immediate children modules.
 |  
 |  cpu(self, device_id=None)
 |      Moves all model parameters and buffers to the CPU.
 |  
 |  cuda(self, device_id=None)
 |      Moves all model parameters and buffers to the GPU.
 |      
 |      Arguments:
 |          device_id (int, optional): if specified, all parameters will be
 |              copied to that device
 |  
 |  double(self)
 |      Casts all parameters and buffers to double datatype.
 |  
 |  eval(self)
 |      Sets the module in evaluation mode.
 |      
 |      This has any effect only on modules such as Dropout or BatchNorm.
 |  
 |  float(self)
 |      Casts all parameters and buffers to float datatype.
 |  
 |  half(self)
 |      Casts all parameters and buffers to half datatype.
 |  
 |  load_state_dict(self, state_dict)
 |      Copies parameters and buffers from :attr:`state_dict` into
 |      this module and its descendants. The keys of :attr:`state_dict` must
 |      exactly match the keys returned by this module's :func:`state_dict()`
 |      function.
 |      
 |      Arguments:
 |          state_dict (dict): A dict containing parameters and
 |              persistent buffers.
 |  
 |  modules(self)
 |      Returns an iterator over all modules in the network.
 |      
 |      Note:
 |          Duplicate modules are returned only once. In the following
 |          example, ``l`` will be returned only once.
 |      
 |          >>> l = nn.Linear(2, 2)
 |          >>> net = nn.Sequential(l, l)
 |          >>> for idx, m in enumerate(net.modules()):
 |          >>>     print(idx, '->', m)
 |          0 -> Sequential (
 |            (0): Linear (2 -> 2)
 |            (1): Linear (2 -> 2)
 |          )
 |          1 -> Linear (2 -> 2)
 |  
 |  named_children(self)
 |      Returns an iterator over immediate children modules, yielding both
 |      the name of the module as well as the module itself.
 |      
 |      Example:
 |          >>> for name, module in model.named_children():
 |          >>>     if name in ['conv4', 'conv5']:
 |          >>>         print(module)
 |  
 |  named_modules(self, memo=None, prefix='')
 |      Returns an iterator over all modules in the network, yielding
 |      both the name of the module as well as the module itself.
 |      
 |      Note:
 |          Duplicate modules are returned only once. In the following
 |          example, ``l`` will be returned only once.
 |      
 |          >>> l = nn.Linear(2, 2)
 |          >>> net = nn.Sequential(l, l)
 |          >>> for idx, m in enumerate(net.named_modules()):
 |          >>>     print(idx, '->', m)
 |          0 -> ('', Sequential (
 |            (0): Linear (2 -> 2)
 |            (1): Linear (2 -> 2)
 |          ))
 |          1 -> ('0', Linear (2 -> 2))
 |  
 |  named_parameters(self, memo=None, prefix='')
 |      Returns an iterator over module parameters, yielding both the
 |      name of the parameter as well as the parameter itself
 |      
 |      Example:
 |          >>> for name, param in self.named_parameters():
 |          >>>    if name in ['bias']:
 |          >>>        print(param.size())
 |  
 |  parameters(self, memo=None)
 |      Returns an iterator over module parameters.
 |      
 |      This is typically passed to an optimizer.
 |      
 |      Example:
 |          >>> for param in model.parameters():
 |          >>>     print(type(param.data), param.size())
 |          <class 'torch.FloatTensor'> (20L,)
 |          <class 'torch.FloatTensor'> (20L, 1L, 5L, 5L)
 |  
 |  register_backward_hook(self, hook)
 |      Registers a backward hook on the module.
 |      
 |      The hook will be called every time the gradients with respect to module
 |      inputs are computed. The hook should have the following signature::
 |      
 |          hook(module, grad_input, grad_output) -> Tensor or None
 |      
 |      The :attr:`grad_input` and :attr:`grad_output` may be tuples if the
 |      module has multiple inputs or outputs. The hook should not modify its
 |      arguments, but it can optionally return a new gradient with respect to
 |      input that will be used in place of :attr:`grad_input` in subsequent
 |      computations.
 |      
 |      This function returns a handle with a method ``handle.remove()``
 |      that removes the hook from the module.
 |  
 |  register_buffer(self, name, tensor)
 |      Adds a persistent buffer to the module.
 |      
 |      This is typically used to register a buffer that should not to be
 |      considered a model parameter. For example, BatchNorm's ``running_mean``
 |      is not a parameter, but is part of the persistent state.
 |      
 |      Buffers can be accessed as attributes using given names.
 |      
 |      Example:
 |          >>> self.register_buffer('running_mean', torch.zeros(num_features))
 |  
 |  register_forward_hook(self, hook)
 |      Registers a forward hook on the module.
 |      
 |      The hook will be called every time :func:`forward` computes an output.
 |      It should have the following signature::
 |      
 |          hook(module, input, output) -> None
 |      
 |      The hook should not modify the input or output.
 |      This function returns a handle with a method ``handle.remove()``
 |      that removes the hook from the module.
 |  
 |  register_parameter(self, name, param)
 |      Adds a parameter to the module.
 |      
 |      The parameter can be accessed as an attribute using given name.
 |  
 |  share_memory(self)
 |  
 |  state_dict(self, destination=None, prefix='')
 |      Returns a dictionary containing a whole state of the module.
 |      
 |      Both parameters and persistent buffers (e.g. running averages) are
 |      included. Keys are corresponding parameter and buffer names.
 |      
 |      Example:
 |          >>> module.state_dict().keys()
 |          ['bias', 'weight']
 |  
 |  train(self, mode=True)
 |      Sets the module in training mode.
 |      
 |      This has any effect only on modules such as Dropout or BatchNorm.
 |  
 |  type(self, dst_type)
 |  
 |  zero_grad(self)
 |      Sets gradients of all model parameters to zero.
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from torch.nn.modules.module.Module:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes inherited from torch.nn.modules.module.Module:
 |  
 |  dump_patches = False



In [5]:

    
mkeys = m.state_dict().keys()
print mkeys









    



['weight_ih_l0', 'weight_hh_l0']