In [1]:
import matplotlib.pyplot as plt
import scipy as sc
import numpy as np

# Load signal used to test stuff
from scipy.io import wavfile

fs, data = wavfile.read("/home/jfsantos/Documents/speech_training.wav")
data = data/2.0**15

In [2]:
%matplotlib inline
plt.plot(data[0:fs*5])


Out[2]:
[<matplotlib.lines.Line2D at 0x31c9490>]

In [3]:
from sparse_coding.sparse_coding_gammatone import gammatone_matrix, erb_space
from scipy.signal import hamming

resolution = 160
step = 8
b = 1.019
n_channels = 50
win = hamming(resolution)

# Compute a Gammatone dictionary
D_multi = np.r_[tuple(gammatone_matrix(b, fc, resolution, step)
                       for fc in erb_space(150, fs/2, n_channels))]

In [4]:
import itertools
# Example of an entry from the dictionary
fig, axes = plt.subplots(6,6)
fig.set_size_inches(16,12)
for i, j in itertools.product(range(1,7), range(6)):
    axes[i-1][j].plot(D_multi[6*(i-1)+j])



In [93]:
from scikits.talkbox import segment_axis

# Get sample speech segment to reconstruct
test_data = win*segment_axis(data[fs*200:fs*210], resolution, overlap=int(.5*resolution))
print test_data.shape

# Reconstruct it frame-by-frame
from sklearn.decomposition import SparseCoder

coder = SparseCoder(dictionary = D_multi, transform_n_nonzero_coefs=None, transform_alpha=10, transform_algorithm="omp")

result = coder.transform(test_data)


(1999, 160)

In [95]:
orig = data[fs*200:fs*210]
out = np.zeros(orig.shape)

for n in range(result.shape[0]):
    idx0 = int(n*resolution*.5)
    out[idx0:idx0+resolution] += np.sum(D_multi.T*result[n],axis=1)

In [94]:
np.mean(sum(result!=0))


Out[94]:
33.667368421052629

In [96]:
fig, axes = plt.subplots(3)
fig.set_size_inches(10,8)

axes[0].plot(orig)
axes[1].plot(out, 'g')
axes[2].plot((out-orig)**2, 'r')


Out[96]:
[<matplotlib.lines.Line2D at 0x82291d0>]

In [97]:
wavfile.write('reconst_gammatone.wav', 16000, np.asarray(out, dtype=np.float32))
#wavfile.write('orig.wav', 16000, np.asarray(orig, dtype=np.float32))

In [98]:
# Test different sparsity values and correspondent outputs

K = [1, 2, 5, 10, 20, 50]
out_k = np.zeros((len(orig),len(K)))

for k in range(len(K)):
    coder = SparseCoder(dictionary = D_multi, transform_n_nonzero_coefs=K[k], transform_alpha=None, transform_algorithm="omp")
    result = coder.transform(test_data)
    for n in range(result.shape[0]):
        idx0 = int(n*resolution*.5)
        out_k[idx0:idx0+resolution,k] += np.sum(D_multi.T*result[n],axis=1)

In [99]:
fig, axes = plt.subplots(len(K)+1)
fig.set_size_inches(10,15)

axes[0].plot(orig)

for k in range(len(K)):
    axes[k+1].plot(out_k[:,k],'g')



In [100]:
plt.specgram(orig)

for k in range(len(K)):
    plt.figure()
    plt.specgram(out_k[:,k])



In [101]:
wavfile.write('reconst_gammatone_k1.wav', 16000, np.asarray(out_k[:,0], dtype=np.float32))

In [ ]: