In [1]:

    
%matplotlib inline
import numpy as np
import tensorflow as tf
import os
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy import signal as scisig
import pickle
import sys
sys.path.append('..')
import matchingpursuit
import signalset



In [2]:

    
sigset = signalset.SignalSet(data='../../Data/speech_corpora/TIMIT/')









    



Found  6292  files



In [3]:

    
mp = matchingpursuit.MatchingPursuer(sigset, use_gammachirps=True)
phi = mp.phi



In [5]:

    
phi /= np.linalg.norm(phi,axis=1)[:,None]



In [6]:

    
sigset.tiled_plot(phi)



In [12]:

    
phi.shape









    Out[12]:





(32, 800, 1)



In [13]:

    
phi = phi[:,:,0]



In [17]:

    
convs = []
count=0
for signal in sigset.data:
    #these = [scisig.fftconvolve(elem,signal,mode='full') for elem in phi]
    #convs.append(np.concatenate(these,axis=0))
    convs.append(scisig.fftconvolve(phi,signal[None,:],mode='full'))
    if count > 500:
        break
    count += 1
convs = np.concatenate(convs,axis=1)



In [18]:

    
convs.shape









    Out[18]:





(32, 27945377)



In [23]:

    
plt.figure(figsize=(20,10))
for ii in range(convs.shape[0]):
    histo, edges = np.histogram(convs[ii], bins=25, range=(-1,1), density=True)
    centers = [(edges[kk]+edges[kk+1])/2 for kk in range(len(edges)-1)]
    plt.plot(centers, histo, color=(0,0,ii/32))
    plt.yscale('log')



In [8]:

    
def expected_minusabs(A, excess=True):
    """Compute expected value of -|A[...,:]| after normalizing, minus 
    - sqrt(2/pi)
    which is the value for a standard normal distribution, if excess."""
    ans = np.mean(-np.abs(A), axis=-1)
    return ans + np.sqrt(2/np.pi) if excess else ans



In [26]:

    
B=convs
B -= B.mean(axis=-1)[:,None]
B /= B.std(axis=-1)[:,None]



In [30]:

    
expected_minusabs(B[0])









    Out[30]:





0.25951443147481257



In [31]:

    
sparsity = [expected_minusabs(b) for b in B]
plt.plot(sparsity)









    Out[31]:





[<matplotlib.lines.Line2D at 0x29f24ee1588>]

The trouble with this analysis is I think it would show that basically anything is sparse if there's a lot of silence, which doesn't really say anything about the structure of the sounds that are present. A better calculation might normalize by the power of the sound for each dot product.



In [5]:

    
noise = np.random.randn(800)
noise /= np.linalg.norm(noise)



In [6]:

    
convs = []
count=0
for signal in sigset.data:
    convs.append(scisig.fftconvolve(noise,signal,mode='full'))
    if count > 500:
        break
    count += 1
convs = np.concatenate(convs)



In [7]:

    
convs.shape









    Out[7]:





(27945377,)



In [9]:

    
expected_minusabs(convs)









    Out[9]:





0.26606781330850504



In [10]:

    
plt.figure(figsize=(20,10))
histo, edges = np.histogram(convs, bins=25, range=(-1,1), density=True)
centers = [(edges[kk]+edges[kk+1])/2 for kk in range(len(edges)-1)]
plt.plot(centers, histo, color=(0,0,0.5))
plt.yscale('log')



In [ ]: