Acoustics-articulation mapping using TensorFlow RNN

2017-05-11 jkang
Python3.5
TensorFlow1.0.1

Task

  • 음향데이터(acoustics)에서 조음데이터(articulaiton)를 예측하기
    • 인풋: 13-d MFCC acoustics
    • 아웃풋: 24-d articulation

References


In [1]:
import tensorflow as tf
import numpy as np
import pickle
import matplotlib.pyplot as plt

# Input/Output data
data_dir = '/Volumes/Transcend/[DataArchive]/WisconsinArchives/extractTrainingData/training_data_art_mfcc_vowel_noDeriv/female'
with open(data_dir+'/acoustics.pckl', 'rb') as f:
    acoustics = pickle.load(f)
with open(data_dir+'/articulation.pckl', 'rb') as f:
    articulation = pickle.load(f)

In [2]:
n_examples = len(acoustics)
print(n_examples)


13674

In [3]:
# Learning parameters
learning_rate = 0.001
max_iter = 1000

# Network Parameters
n_input_dim = acoustics[0].shape[0]
# n_input_len = char_input.shape[0]
n_output_dim = articulation[0].shape[0]
# n_output_len = char_output.shape[0]
n_hidden = 200
n_examples = len(acoustics)

# TensorFlow graph
# (batch_size) x (time_step) x (input_dimension)
x_data = tf.placeholder(tf.float32, [1, None, n_input_dim])
# (batch_size) x (time_step) x (output_dimension)
y_data = tf.placeholder(tf.float32, [1, None, n_output_dim])

# Parameters
weights = {
    'out': tf.Variable(tf.truncated_normal([n_hidden, n_output_dim]))
}
biases = {
    'out': tf.Variable(tf.truncated_normal([n_output_dim]))
}

In [4]:
def RNN(x, weights, biases):
    cell = tf.contrib.rnn.BasicRNNCell(n_hidden) # Make RNNCell
    outputs, states = tf.nn.dynamic_rnn(cell, x, time_major=False, dtype=tf.float32)
    '''
    **Notes on tf.nn.dynamic_rnn**

    - 'x' can have shape (batch)x(time)x(input_dim), if time_major=False or 
                         (time)x(batch)x(input_dim), if time_major=True
    - 'outputs' can have the same shape as 'x'
                         (batch)x(time)x(input_dim), if time_major=False or 
                         (time)x(batch)x(input_dim), if time_major=True
    - 'states' is the final state, determined by batch and hidden_dim
    '''
    
    # outputs[-1] is outputs for the last example in the mini-batch
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

pred = RNN(x_data, weights, biases)
cost = tf.reduce_mean(tf.squared_difference(pred, y_data))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [5]:
# Learning
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(max_iter):
        for n in range(n_examples):
            x_train = acoustics[n].reshape((1, acoustics[n].shape[1], n_input_dim))
            y_train = articulation[n].reshape((1, articulation[n].shape[1], n_output_dim))
            _, loss, p = sess.run([optimizer, cost, pred],
                                  feed_dict={x_data: x_train, y_data: y_train})
        if (i+1) % 1 == 0:
            print('Epoch:{:>4}/{},'.format(i+1,max_iter),
                  'Cost:{:.4f},'.format(loss))


Epoch:   1/1000, Cost:56.2185,
Epoch:   2/1000, Cost:34.7169,
Epoch:   3/1000, Cost:31.5714,
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-5-b040c9ebd915> in <module>()
      7             y_train = articulation[n].reshape((1, articulation[n].shape[1], n_output_dim))
      8             _, loss, p = sess.run([optimizer, cost, pred],
----> 9                                   feed_dict={x_data: x_train, y_data: y_train})
     10         if (i+1) % 1 == 0:
     11             print('Epoch:{:>4}/{},'.format(i+1,max_iter),

/Users/jaegukang/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    765     try:
    766       result = self._run(None, fetches, feed_dict, options_ptr,
--> 767                          run_metadata_ptr)
    768       if run_metadata:
    769         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/Users/jaegukang/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    938           np_val = np.asarray(subfeed_val, dtype=subfeed_dtype)
    939 
--> 940           if not subfeed_t.get_shape().is_compatible_with(np_val.shape):
    941             raise ValueError(
    942                 'Cannot feed value of shape %r for Tensor %r, '

/Users/jaegukang/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/tensor_shape.py in is_compatible_with(self, other)
    716         return False
    717       for x_dim, y_dim in zip(self._dims, other.dims):
--> 718         if not x_dim.is_compatible_with(y_dim):
    719           return False
    720     return True

/Users/jaegukang/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/tensor_shape.py in is_compatible_with(self, other)
     91     other = as_dimension(other)
     92     return (self._value is None
---> 93             or other.value is None
     94             or self._value == other.value)
     95 

/Users/jaegukang/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/tensor_shape.py in value(self)
     75   def value(self):
     76     """The value of this dimension, or None if it is unknown."""
---> 77     return self._value
     78 
     79   def is_compatible_with(self, other):

KeyboardInterrupt: