notebook.community

Edit and run



In [1]:

    
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
from tensorflow.python.ops import ctc_ops as ctc
import pprint
sess = tf.InteractiveSession()



In [12]:

    
x_train = np.arange(288000, dtype=np.float32).reshape(6, 1000, 48)
y0 = np.arange(47*96, dtype=np.float32).reshape(47, 96)
y1 = np.arange(49*96, dtype=np.float32).reshape(49, 96)
y2 = np.arange(54*96, dtype=np.float32).reshape(54, 96)
y3 = np.arange(42*96, dtype=np.float32).reshape(42, 96)
y4 = np.arange(57*96, dtype=np.float32).reshape(57, 96)
y5 = np.arange(50*96, dtype=np.float32).reshape(50, 96)

a = [y0,y1,y2,y3,y4,y5]
y_train = np.asarray(a)
print(x_train.shape)
print(y_train.shape)
print(type(x_train[0]))
print(y_train[0].shape)









    



(6, 1000, 48)
(6,)
<class 'numpy.ndarray'>
(47, 96)



In [18]:

    
x = len(y_train)
#print(x)
target_idx = []
target_values = []
target_values2 = []


max_x = 0 
for ii, i in enumerate(y_train):
    x,y = i.shape
    #print(x,y)
    target_values2.append(np.array([lable.argmax() for lable in y_train[ii]]))
    for p in range(x):
        target_idx.append((ii,p))
        target_values.append(y_train[ii][p])
        
        

print(target_values2)









    



[array([95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95]), array([95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95]), array([95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95]), array([95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95, 95, 95, 95, 95, 95]), array([95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95, 95, 95, 95]), array([95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,
       95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95])]



In [4]:

    
batch_size = 6
num_epochs = 200
num_lstm_hidden = 512
learning_rate = 0.01
momentum = 0.9
n_steps = 1000
num_classes = 96



In [5]:

    
e = 9



In [6]:

    
with tf.variable_scope('bi-directional') as scope:
    # bi-directional rnn
    
    
    targets = tf.SparseTensor([(0,0),(0,1),(0,2),(0,3),(0,4),(1,0),(1,1),(1,2),(2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6),(2,7)], [0,3,1,3,2,0,3,1,1,3,0,2,3,2,3,2] , [3,8])
    
    cell_fw = rnn.BasicLSTMCell(num_units=num_lstm_hidden, state_is_tuple=True)
    cell_bw = rnn.BasicLSTMCell(num_units=num_lstm_hidden, state_is_tuple=True)

    outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, x_train,sequence_length=[1000, 1000, 1000,1000,1000,1000], dtype=tf.float32)
    
    sess.run(tf.global_variables_initializer())
    
    outputs = tf.reshape(outputs, [-1, num_lstm_hidden])
    
    
    
    print(sess.run(tf.shape(outputs)))
    
    # Weights for regression layer.
    W = tf.Variable(tf.truncated_normal([num_lstm_hidden, num_classes], stddev=0.1), name='W')
    b = tf.Variable(tf.constant(0., shape=[num_classes]), name='b')

    # Apply linear transform
    logits = tf.matmul(outputs, W) + b
    
    print(sess.run(tf.shape(logits)))

    # Reshaping back to the original shape
    logits = tf.reshape(logits, [6, -1, num_classes])
    
    print(sess.run(tf.shape(logits)))

    # Swap dimensions to time major for CTC loss.
    logits = tf.transpose(logits, (1, 0, 2))
    
    print(sess.run(tf.shape(logits)))
    #print(sess.run(logits))
    
    #seq_len = tf.placeholder(tf.int32, [None])

    loss = ctc.ctc_loss(targets, logits, [1000, 1000, 1000,1000,1000,1000])
    
    print(sess.run(tf.shape(loss)))
    print(sess.run(tf.shape(targets)))
#     cost = tf.reduce_mean(loss)

#     # Record the loss
#     tf.scalar_summary('loss', cost)

#     optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
#                                            momentum=momentum, use_nesterov=True).minimize(cost)

#     decoded, log_prob = ctc.ctc_beam_search_decoder(inputs=logits, sequence_length=seq_len)

#     # Label error rate using the edit distance between output and target
#     ler = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32), targets))









    



[12000   512]
[12000    96]
[   6 2000   96]
[2000    6   96]
[6]
[3 8]



In [ ]: