In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy.signal import spectrogram
plt.style.use('ggplot')
%matplotlib inline
In [2]:
# Generating arificial utterances.
# Make a grid of frequencies.
frequencies = np.arange(10, 2010,10)
RATE = int(16 * 1e3) # 16kHz
LENGTH = 3 # 5 Seconds
frequencies
Out[2]:
In [9]:
datadir = '/afs/inf.ed.ac.uk/user/s10/s1003970/diss/waveform-asr/tests/test_data/'
In [12]:
def make_wav(freq):
time = np.arange(start=0, stop=RATE*LENGTH, dtype=np.float32)
data = np.cos(time*freq*2*np.pi/RATE)
data = data.astype(np.float32)
name = datadir + 'waves/{}.wav'.format(freq)
# data = (data + 1)*128
# data = data.astype(np.uint8)
wavfile.write(filename=name,
data=data,
rate=RATE)
return data
In [13]:
data = make_wav(100)
In [14]:
from IPython.display import Audio
Audio(data, rate=16000)
Out[14]:
In [15]:
freqs, times, sgram = spectrogram(data, fs=16000, scaling='density')
sgram.shape, freqs.shape, times.shape
Out[15]:
In [16]:
plt.figure(figsize=(16,5))
plt.imshow(sgram, aspect='auto')
plt.colorbar()
plt.show()
In [17]:
import librosa
mel = librosa.feature.melspectrogram(data, sr=16000)
plt.figure(figsize=(16,5))
plt.imshow(np.log(mel), aspect='auto')
plt.colorbar()
plt.show()
In [22]:
frequencies
Out[22]:
In [18]:
for freq in frequencies:
make_wav(freq)
In [ ]:
cp -r s1003970/diss/waveform-asr/tests/test_data/waves/ metadata/tones/
cp *.npy ~/s1003970/diss/waveform-asr/tests/test_data/tone_reps/
cd diss/waveform-asr/experiments
%run hmm-wavenet.py
In [ ]:
outdir = '/home/s1003970/metadata/tones/'
indir = '/home/s1003970/metadata/tones/waves/'
import tensorflow as tf
from scipy.io import wavfile
sess = tf.InteractiveSession(graph=network.graph)
sess.run(network.init)
tones = os.listdir(indir)
network.restore_model('final')
for tone in tones:
print(tone)
wav = wavfile.read(indir + tone)[1]
feats = sess.run(network.stacks_output,
feed_dict={network.inputs: wav})
feats = feats.squeeze()
new_name = outdir + tone
np.save(file=new_name, arr=feats)
feats = feats.squeeze()
new_name = outdir + tone
np.save(file=new_name, arr=feats)
In [19]:
tonedir = datadir + 'tone_reps/'
In [20]:
import os; tones = os.listdir(tonedir)
In [21]:
tones
Out[21]:
In [202]:
freq = 10
rep = np.load(tonedir + '{}.wav.npy'.format(freq))
plt.figure(figsize=(16,5))
plt.imshow(rep.T, aspect='auto')
plt.colorbar()
plt.show()
plt.figure(figsize=(16,5))
plt.plot(rep[20000,:])
plt.show()
In [193]:
freq = 150
rep200 = np.load(tonedir + '{}.wav.npy'.format(freq))
freq = 120
rep300 = np.load(tonedir + '{}.wav.npy'.format(freq))
plt.figure(figsize=(16,5))
plt.imshow(rep200.T, aspect='auto')
plt.colorbar()
plt.figure(figsize=(16,5))
plt.imshow(rep300.T, aspect='auto')
plt.colorbar()
plt.show()
plt.figure(figsize=(16,5))
plt.plot(rep200[20000,:])
plt.plot(rep300[20000,:])
plt.show()
In [203]:
len(frequencies)
Out[203]:
In [204]:
senses = np.zeros([len(frequencies), 64])
In [206]:
i=0
for freq in frequencies:
rep = np.load(tonedir + '{}.wav.npy'.format(freq))
senses[i,:] = rep[20000,:]
i+=1
In [209]:
senses.shape
Out[209]:
In [223]:
plt.figure(figsize=(16,5))
plt.plot(senses[:,12])
plt.show()
In [212]:
plt.figure(figsize=(16,5))
plt.imshow(senses, aspect='auto')
plt.colorbar()
plt.show()
In [ ]:
In [ ]:
In [ ]: