In [8]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np

In [18]:
def get_nb_params_shape(shape):
    nb_params = 1
    for dim in shape:
        nb_params = nb_params * int(dim)
    return nb_params

def count_number_trainable_params():
    tot_nb_params = 0
    for trainable_variable in slim.get_trainable_variables():
        print(trainable_variable.name, trainable_variable.shape)
        shape = trainable_variable.get_shape()  # e.g [D,F] or [W,H,C]
        current_nb_params = get_nb_params_shape(shape)
        tot_nb_params = tot_nb_params + current_nb_params
    print('Total number of trainable params: ', tot_nb_params)

In [11]:
is_training = True

Текущая модель


In [19]:
tf.reset_default_graph()
with tf.Graph().as_default():
    x = tf.placeholder(dtype=tf.float32, shape=(50, 240, 320, 3), name="input")
    y = tf.placeholder(dtype=tf.int32, shape=(1,), name='labels')

    with slim.arg_scope([slim.conv2d], stride=1, weights_initializer=tf.contrib.layers.xavier_initializer_conv2d()):
        with tf.variable_scope('Convolution', [input]):
            conv1 = slim.conv2d(x, 32, [1, 1], stride=2, scope='Conv1', normalizer_fn=slim.batch_norm,
                                normalizer_params={'is_training': is_training})
            pool1 = slim.max_pool2d(conv1, [3, 3], scope='Pool1', stride=1)
            conv2 = slim.conv2d(pool1, 32, [3, 3], scope='Conv2')
            pool2 = slim.max_pool2d(conv2, [3, 3], scope='Pool2', stride=1)
            conv3 = slim.conv2d(pool2, 32, [3, 3], stride=2, scope='Conv3')
    size = np.prod(conv3.get_shape().as_list()[1:])

    with tf.variable_scope('GRU_RNN_cell'):
        rnn_inputs = tf.reshape(conv3, (-1, 50, size))
        cell = tf.contrib.rnn.GRUCell(100)
        init_state = cell.zero_state(1, dtype=tf.float32)
        rnn_outputs, _ = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)
        output = tf.reduce_mean(rnn_outputs, axis=1)

    with tf.name_scope('Dense'):
        logits = slim.fully_connected(output, 6, scope="Fully-connected")
    count_number_trainable_params()


Convolution/Conv1/weights:0 (1, 1, 3, 32)
Convolution/Conv1/BatchNorm/beta:0 (32,)
Convolution/Conv2/weights:0 (3, 3, 32, 32)
Convolution/Conv2/biases:0 (32,)
Convolution/Conv3/weights:0 (3, 3, 32, 32)
Convolution/Conv3/biases:0 (32,)
GRU_RNN_cell/rnn/gru_cell/gates/weights:0 (144868, 200)
GRU_RNN_cell/rnn/gru_cell/gates/biases:0 (200,)
GRU_RNN_cell/rnn/gru_cell/candidate/weights:0 (144868, 100)
GRU_RNN_cell/rnn/gru_cell/candidate/biases:0 (100,)
Fully-connected/weights:0 (100, 6)
Fully-connected/biases:0 (6,)
Total number of trainable params:  43479930

Модель Inception


In [20]:
tf.reset_default_graph()
with tf.Graph().as_default():

    inputs = tf.placeholder(dtype=tf.float32, shape=(50, 240, 320, 3), name="input")
    y = tf.placeholder(dtype=tf.int32, shape=(1,), name='labels')

    # inputs = slim.conv2d(x, 32, [3, 3], scope='Conv2d_0a_1x1', stride=2, padding='VALID')

    conv1 = slim.conv2d(inputs, 32, [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_3x3')
    conv2 = slim.conv2d(conv1, 32, [3, 3], padding='VALID', scope='Conv2d_2a_3x3')
    inc_inputs = slim.conv2d(conv2, 64, [3, 3], scope='Conv2d_2b_3x3')

    with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], stride=1, padding='SAME'):
        with tf.variable_scope('BlockInceptionA', [inc_inputs]):
            with tf.variable_scope('IBranch_0'):
                ibranch_0 = slim.conv2d(inc_inputs, 96, [1, 1], scope='IConv2d_0a_1x1')
            with tf.variable_scope('IBranch_1'):
                ibranch_1_conv1 = slim.conv2d(inc_inputs, 64, [1, 1], scope='IConv2d_0a_1x1')
                ibranch_1 = slim.conv2d(ibranch_1_conv1, 96, [3, 3], scope='IConv2d_0b_3x3')
            with tf.variable_scope('IBranch_2'):
                ibranch_2_conv1 = slim.conv2d(inc_inputs, 64, [1, 1], scope='IConv2d_0a_1x1')
                ibranch_2_conv2 = slim.conv2d(ibranch_2_conv1, 96, [3, 3], scope='IConv2d_0b_3x3')
                ibranch_2 = slim.conv2d(ibranch_2_conv2, 96, [3, 3], scope='IConv2d_0c_3x3')
            with tf.variable_scope('IBranch_3'):
                ibranch_3_pool = slim.avg_pool2d(inc_inputs, [3, 3], scope='IAvgPool_0a_3x3')
                ibranch_3 = slim.conv2d(ibranch_3_pool, 96, [1, 1], scope='IConv2d_0b_1x1')
            inception = tf.concat(axis=3, values=[ibranch_0, ibranch_1, ibranch_2, ibranch_3])

    with tf.variable_scope('BlockReductionA', [inception]):
        with tf.variable_scope('RBranch_0'):
            rbranch_0 = slim.conv2d(inception, 384, [3, 3], stride=2, padding='VALID', scope='RConv2d_1a_3x3')
        with tf.variable_scope('RBranch_1'):
            rbranch_1_conv1 = slim.conv2d(inception, 192, [1, 1], scope='RConv2d_0a_1x1')
            rbranch_1_conv2 = slim.conv2d(rbranch_1_conv1, 224, [3, 3], scope='RConv2d_0b_3x3')
            rbranch_1 = slim.conv2d(rbranch_1_conv2, 256, [3, 3], stride=2, padding='VALID', scope='RConv2d_1a_3x3')
        with tf.variable_scope('RBranch_2'):
            rbranch_2 = slim.max_pool2d(inception, [3, 3], stride=2, padding='VALID', scope='RMaxPool_1a_3x3')
        reduction = tf.concat(axis=3, values=[rbranch_0, rbranch_1, rbranch_2])

    size = np.prod(reduction.get_shape().as_list()[1:])

    with tf.variable_scope('GRU_RNN_cell'):
        rnn_inputs = tf.reshape(reduction, (-1, 50, size))
        cell = tf.contrib.rnn.GRUCell(100)
        init_state = cell.zero_state(1, dtype=tf.float32)
        rnn_outputs, _ = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)
        output = tf.reduce_mean(rnn_outputs, axis=1)

    with tf.name_scope('Dense'):
        logits = slim.fully_connected(output, 6, scope="Fully-connected")
    count_number_trainable_params()


Conv2d_1a_3x3/weights:0 (3, 3, 3, 32)
Conv2d_1a_3x3/biases:0 (32,)
Conv2d_2a_3x3/weights:0 (3, 3, 32, 32)
Conv2d_2a_3x3/biases:0 (32,)
Conv2d_2b_3x3/weights:0 (3, 3, 32, 64)
Conv2d_2b_3x3/biases:0 (64,)
BlockInceptionA/IBranch_0/IConv2d_0a_1x1/weights:0 (1, 1, 64, 96)
BlockInceptionA/IBranch_0/IConv2d_0a_1x1/biases:0 (96,)
BlockInceptionA/IBranch_1/IConv2d_0a_1x1/weights:0 (1, 1, 64, 64)
BlockInceptionA/IBranch_1/IConv2d_0a_1x1/biases:0 (64,)
BlockInceptionA/IBranch_1/IConv2d_0b_3x3/weights:0 (3, 3, 64, 96)
BlockInceptionA/IBranch_1/IConv2d_0b_3x3/biases:0 (96,)
BlockInceptionA/IBranch_2/IConv2d_0a_1x1/weights:0 (1, 1, 64, 64)
BlockInceptionA/IBranch_2/IConv2d_0a_1x1/biases:0 (64,)
BlockInceptionA/IBranch_2/IConv2d_0b_3x3/weights:0 (3, 3, 64, 96)
BlockInceptionA/IBranch_2/IConv2d_0b_3x3/biases:0 (96,)
BlockInceptionA/IBranch_2/IConv2d_0c_3x3/weights:0 (3, 3, 96, 96)
BlockInceptionA/IBranch_2/IConv2d_0c_3x3/biases:0 (96,)
BlockInceptionA/IBranch_3/IConv2d_0b_1x1/weights:0 (1, 1, 64, 96)
BlockInceptionA/IBranch_3/IConv2d_0b_1x1/biases:0 (96,)
BlockReductionA/RBranch_0/RConv2d_1a_3x3/weights:0 (3, 3, 384, 384)
BlockReductionA/RBranch_0/RConv2d_1a_3x3/biases:0 (384,)
BlockReductionA/RBranch_1/RConv2d_0a_1x1/weights:0 (1, 1, 384, 192)
BlockReductionA/RBranch_1/RConv2d_0a_1x1/biases:0 (192,)
BlockReductionA/RBranch_1/RConv2d_0b_3x3/weights:0 (3, 3, 192, 224)
BlockReductionA/RBranch_1/RConv2d_0b_3x3/biases:0 (224,)
BlockReductionA/RBranch_1/RConv2d_1a_3x3/weights:0 (3, 3, 224, 256)
BlockReductionA/RBranch_1/RConv2d_1a_3x3/biases:0 (256,)
GRU_RNN_cell/rnn/gru_cell/gates/weights:0 (4632676, 200)
GRU_RNN_cell/rnn/gru_cell/gates/biases:0 (200,)
GRU_RNN_cell/rnn/gru_cell/candidate/weights:0 (4632676, 100)
GRU_RNN_cell/rnn/gru_cell/candidate/biases:0 (100,)
Fully-connected/weights:0 (100, 6)
Fully-connected/biases:0 (6,)
Total number of trainable params:  1392352026

VGG16


In [23]:
tf.reset_default_graph()
with tf.Graph().as_default():
    inputs = tf.placeholder(dtype=tf.float32, shape=(50, 240, 320, 3), name="input")
    y = tf.placeholder(dtype=tf.int32, shape=(1,), name='labels')
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                      activation_fn=tf.nn.relu,
                      weights_initializer=tf.truncated_normal_initializer(0.0, 0.01),
                      weights_regularizer=slim.l2_regularizer(0.0005)):
        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
        net = slim.max_pool2d(net, [2, 2], scope='pool1')
        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
        net = slim.max_pool2d(net, [2, 2], scope='pool2')
        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
        net = slim.max_pool2d(net, [2, 2], scope='pool3')
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
        net = slim.max_pool2d(net, [2, 2], scope='pool4')
        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
        net = slim.max_pool2d(net, [2, 2], scope='pool5')
        net = slim.fully_connected(net, 4096, scope='fc6')
        net = slim.dropout(net, 0.5, scope='dropout6')
        net = slim.fully_connected(net, 4096, scope='fc7')
        net = slim.dropout(net, 0.5, scope='dropout7')
        net = slim.fully_connected(net, 1000, activation_fn=None, scope='fc8')
    
    size = np.prod(net.get_shape().as_list()[1:])

    with tf.variable_scope('GRU_RNN_cell'):
        rnn_inputs = tf.reshape(net, (-1, 50, size))
        cell = tf.contrib.rnn.GRUCell(100)
        init_state = cell.zero_state(1, dtype=tf.float32)
        rnn_outputs, _ = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)
        output = tf.reduce_mean(rnn_outputs, axis=1)

    with tf.name_scope('Dense'):
        logits = slim.fully_connected(output, 6, scope="Fully-connected")
    count_number_trainable_params()


conv1/conv1_1/weights:0 (3, 3, 3, 64)
conv1/conv1_1/biases:0 (64,)
conv1/conv1_2/weights:0 (3, 3, 64, 64)
conv1/conv1_2/biases:0 (64,)
conv2/conv2_1/weights:0 (3, 3, 64, 128)
conv2/conv2_1/biases:0 (128,)
conv2/conv2_2/weights:0 (3, 3, 128, 128)
conv2/conv2_2/biases:0 (128,)
conv3/conv3_1/weights:0 (3, 3, 128, 256)
conv3/conv3_1/biases:0 (256,)
conv3/conv3_2/weights:0 (3, 3, 256, 256)
conv3/conv3_2/biases:0 (256,)
conv3/conv3_3/weights:0 (3, 3, 256, 256)
conv3/conv3_3/biases:0 (256,)
conv4/conv4_1/weights:0 (3, 3, 256, 512)
conv4/conv4_1/biases:0 (512,)
conv4/conv4_2/weights:0 (3, 3, 512, 512)
conv4/conv4_2/biases:0 (512,)
conv4/conv4_3/weights:0 (3, 3, 512, 512)
conv4/conv4_3/biases:0 (512,)
conv5/conv5_1/weights:0 (3, 3, 512, 512)
conv5/conv5_1/biases:0 (512,)
conv5/conv5_2/weights:0 (3, 3, 512, 512)
conv5/conv5_2/biases:0 (512,)
conv5/conv5_3/weights:0 (3, 3, 512, 512)
conv5/conv5_3/biases:0 (512,)
fc6/weights:0 (512, 4096)
fc6/biases:0 (4096,)
fc7/weights:0 (4096, 4096)
fc7/biases:0 (4096,)
fc8/weights:0 (4096, 1000)
fc8/biases:0 (1000,)
GRU_RNN_cell/rnn/gru_cell/gates/weights:0 (70100, 200)
GRU_RNN_cell/rnn/gru_cell/gates/biases:0 (200,)
GRU_RNN_cell/rnn/gru_cell/candidate/weights:0 (70100, 100)
GRU_RNN_cell/rnn/gru_cell/candidate/biases:0 (100,)
Fully-connected/weights:0 (100, 6)
Fully-connected/biases:0 (6,)
Total number of trainable params:  58725154