In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import lfilter
import librosa
import librosa.display
import IPython.display as ipd
Compute frequeny response of the long-term predictor $$ 1-P(\mathrm{e}^{\mathrm{j}\Omega}) = \sqrt{(1-b\cdot\cos(N_0\Omega))^2 + b^2\cdot\sin^2(N_0\Omega)}\cdot\mathrm{e}^{-\mathrm{j}\phi_P(\Omega)} $$ with $$ \phi_P(\Omega) = -\arctan\left(\frac{b\cdot \sin(N_0\Omega)}{1-b\cos(N_0\Omega)}\right) $$
In [4]:
def get_LTP_Frequency_Response(Omega, N0, b):
phi_P = -np.arctan2(b*np.sin(N0*Omega), 1-b*np.cos(N0*Omega))
return np.sqrt(np.square((1-b*np.cos(N0*Omega))) + np.square(b*np.sin(N0*Omega))) *np.exp(-1j * phi_P)
Omega = np.linspace(0,np.pi,10000)
N0 = 100
LTP_Freqb1 = get_LTP_Frequency_Response(Omega, N0, 1)
LTP_Freqb08 = get_LTP_Frequency_Response(Omega, N0, 0.8)
Plot frequency response
In [5]:
font = {'size' : 18}
plt.rc('font', **font)
plt.rc('text', usetex=True)
plt.figure(figsize=(12, 5))
plt.subplot(1,2,1)
plt.plot(Omega, np.abs(LTP_Freqb1), lw=2)
plt.xlim((0,2*np.pi/N0*8))
plt.ylim((0,2.2))
plt.xticks([2*np.pi/N0], labels=[r'$\frac{2\pi}{N_0}$'])
plt.xlabel('$\Omega$')
plt.ylabel(r'$|1-P(\mathrm{e}^{\mathrm{j}\Omega})|$')
plt.title('$b=1$')
plt.grid(True)
plt.subplot(1,2,2)
plt.plot(Omega, np.abs(LTP_Freqb08), lw=2)
plt.xlim((0,2*np.pi/N0*8))
plt.ylim((0,2.2))
plt.xticks([2*np.pi/N0], labels=[r'$\frac{2\pi}{N_0}$'])
plt.xlabel('$\Omega$')
plt.ylabel(r'$|1-P(\mathrm{e}^{\mathrm{j}\Omega})|$')
plt.title('$b=0.8$')
plt.grid(True)
plt.tight_layout()
plt.savefig('figure_DST_6.15bc.pdf', bbox_inches='tight')
Illustrate long-term prediction on a short speech segment
In [6]:
wave_filename = 'speech_segment.wav'
# load file, do *not* resample
x, sampling_rate = librosa.load(wave_filename, sr=None)
# time in ms
t = np.arange(len(x)) / sampling_rate * 1000.0
In [7]:
# helper function calculating the auto-correlation coefficient r
def get_r(x, l):
x_shift = np.roll(x,l)
x_shift[:l] = 0
return np.correlate(x,x_shift)[0]
def get_s(x, l):
x_shift = np.roll(x,l)
x_shift[:l] = 0
return np.correlate(x_shift,x_shift)[0]
def get_prediction_coefficients(x, n):
r = np.array([get_r(x,k) for k in np.arange(1,n+1)])
R = np.array([np.concatenate(([get_r(x,j) for j in np.arange(i,0,-1)], [get_r(x,j) for j in np.arange(0,n-i)])) for i in range(n)])
a_opt = np.linalg.inv(R) @ r
return a_opt
def get_prediction_frequency_response(a, Omega):
A = np.ones_like(Omega) + 1j*np.zeros_like(Omega)
for k in range(len(a)):
A -= a[k] * np.exp(-1j*(k+1)*Omega)
return A
In [8]:
# short-term prediction
n = 8
a = get_prediction_coefficients(x, n)
xh = lfilter(np.concatenate(([0], a)), 1, x)
d = x - xh
print(len(x))
In [9]:
# find optimal N0 and b
# sweep values
N0_range= np.arange(10,160)
N0_cost = np.zeros_like(N0_range, dtype=float)
for idx,value in enumerate(N0_range):
N0_cost[idx] = get_s(d, 0) - (get_r(d, value)**2)/get_s(d, value)
N0 = N0_range[np.argmin(N0_cost)]
# if we have N0, get b
b = get_r(x, N0)/get_s(x, N0)
In [10]:
# apply long term prediction
dh = lfilter(np.concatenate((np.zeros(N0), [b])), 1, d)
dtilde = d - dh
In [11]:
font = {'size' : 18}
plt.rc('font', **font)
plt.rc('text', usetex=True)
plt.figure(figsize=(12, 10))
plt.subplot(3,1,1)
plt.plot(t, x)
plt.xlim((30,t[-1]))
plt.ylim((-1,1))
plt.xlabel('$k\cdot T$ (ms)')
plt.ylabel('$x[k]$')
plt.subplot(3,1,2)
plt.plot(t, d)
plt.xlim((30,t[-1]))
plt.ylim((-1,1))
plt.xlabel('$k\cdot T$ (ms)')
plt.ylabel('$d[k]$')
plt.subplot(3,1,3)
plt.plot(t, dtilde)
plt.xlim((30,t[-1]))
plt.ylim((-1,1))
plt.xlabel('$k\cdot T$ (ms)')
plt.ylabel(r'$\tilde{d}[k]$')
plt.tight_layout()
plt.savefig('figure_DST_6.16b.pdf', bbox_inches='tight')
Plot spectra
In [15]:
X = np.fft.fft(x, 1024)
G = get_prediction_frequency_response(a, Omega)
D = np.fft.fft(d, 1024)
P = 1-get_LTP_Frequency_Response(Omega, N0, b)
Dt= np.fft.fft(dtilde, 1024)
In [16]:
font = {'size' : 18}
plt.rc('font', **font)
plt.rc('text', usetex=True)
plt.figure(figsize=(8, 14))
plt.subplot(5,1,1)
plt.plot(np.linspace(0,sampling_rate/2,len(X)//2),np.abs(X[:(len(X)//2)]), color='xkcd:orange', lw=2)
plt.xlim((0,sampling_rate/2))
plt.ylim((0,65))
plt.xlabel('$f$ (kHz)')
plt.ylabel('$|X(\mathrm{e}^{\mathrm{j}\Omega})|$')
plt.subplot(5,1,2)
plt.plot(np.linspace(0,sampling_rate/2,len(Omega)),np.abs(G), color='xkcd:blue', lw=2)
plt.xlim((0,sampling_rate/2))
plt.ylim((0,8))
plt.xlabel('$f$ (kHz)')
plt.ylabel('$|1-A(\mathrm{e}^{\mathrm{j}\Omega})|$')
plt.subplot(5,1,3)
plt.plot(np.linspace(0,sampling_rate/2,len(D)//2),np.abs(D[:(len(X)//2)]), color='xkcd:orange', lw=2)
plt.xlim((0,sampling_rate/2))
plt.ylim((0,60))
plt.xlabel('$f$ (kHz)')
plt.ylabel('$|D(\mathrm{e}^{\mathrm{j}\Omega})|$')
plt.subplot(5,1,4)
plt.plot(np.linspace(0,sampling_rate/2,len(Omega)),np.abs(1-P), color='xkcd:teal', lw=2)
plt.xlim((0,sampling_rate/2))
plt.ylim((0,2))
plt.xlabel('$f$ (kHz)')
plt.ylabel('$|D(\mathrm{e}^{\mathrm{j}\Omega})|$')
plt.subplot(5,1,5)
plt.plot(np.linspace(0,sampling_rate/2,len(Dt)//2),np.abs(Dt[:(len(X)//2)]), color='xkcd:orange', lw=2)
plt.xlim((0,sampling_rate/2))
plt.ylim((0,60))
plt.xlabel('$f$ (kHz)')
plt.ylabel(r'$|\tilde{D}(\mathrm{e}^{\mathrm{j}\Omega})|$')
plt.tight_layout()
plt.savefig('figure_DST_6.17_part.pdf', bbox_inches='tight')
In [ ]: