In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
from tensorflow.python.ops import ctc_ops as ctc
import pprint
sess = tf.InteractiveSession()
In [12]:
x_train = np.arange(288000, dtype=np.float32).reshape(6, 1000, 48)
y0 = np.arange(47*96, dtype=np.float32).reshape(47, 96)
y1 = np.arange(49*96, dtype=np.float32).reshape(49, 96)
y2 = np.arange(54*96, dtype=np.float32).reshape(54, 96)
y3 = np.arange(42*96, dtype=np.float32).reshape(42, 96)
y4 = np.arange(57*96, dtype=np.float32).reshape(57, 96)
y5 = np.arange(50*96, dtype=np.float32).reshape(50, 96)
a = [y0,y1,y2,y3,y4,y5]
y_train = np.asarray(a)
print(x_train.shape)
print(y_train.shape)
print(type(x_train[0]))
print(y_train[0].shape)
In [18]:
x = len(y_train)
#print(x)
target_idx = []
target_values = []
target_values2 = []
max_x = 0
for ii, i in enumerate(y_train):
x,y = i.shape
#print(x,y)
target_values2.append(np.array([lable.argmax() for lable in y_train[ii]]))
for p in range(x):
target_idx.append((ii,p))
target_values.append(y_train[ii][p])
print(target_values2)
In [4]:
batch_size = 6
num_epochs = 200
num_lstm_hidden = 512
learning_rate = 0.01
momentum = 0.9
n_steps = 1000
num_classes = 96
In [5]:
e = 9
In [6]:
with tf.variable_scope('bi-directional') as scope:
# bi-directional rnn
targets = tf.SparseTensor([(0,0),(0,1),(0,2),(0,3),(0,4),(1,0),(1,1),(1,2),(2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6),(2,7)], [0,3,1,3,2,0,3,1,1,3,0,2,3,2,3,2] , [3,8])
cell_fw = rnn.BasicLSTMCell(num_units=num_lstm_hidden, state_is_tuple=True)
cell_bw = rnn.BasicLSTMCell(num_units=num_lstm_hidden, state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, x_train,sequence_length=[1000, 1000, 1000,1000,1000,1000], dtype=tf.float32)
sess.run(tf.global_variables_initializer())
outputs = tf.reshape(outputs, [-1, num_lstm_hidden])
print(sess.run(tf.shape(outputs)))
# Weights for regression layer.
W = tf.Variable(tf.truncated_normal([num_lstm_hidden, num_classes], stddev=0.1), name='W')
b = tf.Variable(tf.constant(0., shape=[num_classes]), name='b')
# Apply linear transform
logits = tf.matmul(outputs, W) + b
print(sess.run(tf.shape(logits)))
# Reshaping back to the original shape
logits = tf.reshape(logits, [6, -1, num_classes])
print(sess.run(tf.shape(logits)))
# Swap dimensions to time major for CTC loss.
logits = tf.transpose(logits, (1, 0, 2))
print(sess.run(tf.shape(logits)))
#print(sess.run(logits))
#seq_len = tf.placeholder(tf.int32, [None])
loss = ctc.ctc_loss(targets, logits, [1000, 1000, 1000,1000,1000,1000])
print(sess.run(tf.shape(loss)))
print(sess.run(tf.shape(targets)))
# cost = tf.reduce_mean(loss)
# # Record the loss
# tf.scalar_summary('loss', cost)
# optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
# momentum=momentum, use_nesterov=True).minimize(cost)
# decoded, log_prob = ctc.ctc_beam_search_decoder(inputs=logits, sequence_length=seq_len)
# # Label error rate using the edit distance between output and target
# ler = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32), targets))
In [ ]: