Prediction Gain of Linear Prediction

This code is provided as supplementary material of the lecture Quellencodierung.

This code illustrates

Calculation of the prediction gain
Playback of the prediction error signal
Difference between blockwise adaptation and constant predictor coefficients



In [1]:

    
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import lfilter
import librosa
import librosa.display
import IPython.display as ipd

Consider two different wave files



In [2]:

    
wave_filename = '../audio/33711__acclivity__excessiveexposure.wav'

First case: Fixed prediction coefficients

Calculate the prediction gain as $$ G_{\mathsf{p}} = \frac{\sigma_x^2}{\sigma_x^2-\boldsymbol{\varphi}_{XX}^{\mathsf{T}}\boldsymbol{R}_{XX}^{-1}\boldsymbol{\varphi}_{XX}} $$ where $$ \boldsymbol{\varphi}_{XX} := \begin{pmatrix} \varphi_{XX}[1] \\ \varphi_{XX}[2] \\ \vdots\\ \varphi_{XX}[n] \end{pmatrix} $$ and $$ \boldsymbol{R}_{XX} := \begin{pmatrix} \varphi_{XX}[0] & \varphi_{XX}[-1] & \cdots & \varphi_{XX}[1-n] \\ \varphi_{XX}[1] & \varphi_{XX}[0] & \cdots & \varphi_{XX}[2-n] \\ \vdots & \vdots & \ddots & \vdots\\ \varphi_{XX}[n-1] & \varphi_{XX}[n-2] & \cdots & \varphi_{XX}[0] \end{pmatrix} $$ with $$ \varphi_{XX}[\lambda] = \mathbb{E}\left\{X[k]X[k-\lambda]\right\} $$ being the auto-correlation of $X$.



In [3]:

    
# helper function calculating the correlation coefficient as defined in the lecture
def auto_correlation(x, k):
    if k >= 0:
        x_shift = np.roll(x,k)
        x_shift[:k] = 0
    else:
        x_shift = np.roll(x,k)
        x_shift[k:] = 0
        
    return np.correlate(x,x_shift)[0] / len(x)
    
# calculate prediction gain
def get_prediction_gain(x, n):
    if n == 0:
        return 1
    
    phi_XX = np.array([auto_correlation(x,k) for k in np.arange(1,n+1)])
    
    # inefficient way to calculate R_XX as many terms can be reused due to symmetric of the auto-correlation function
    R_XX = np.array([[auto_correlation(x,i-j) for j in range(n)] for i in range(n)])
    
    sigmaq_x = np.mean(np.square(x))    
    
    sigmaq_d = sigmaq_x - phi_XX.T @ np.linalg.inv(R_XX) @ phi_XX
    
    Gp = sigmaq_x / sigmaq_d
    return Gp



In [4]:

    
# compute prediction gains for two different audio files
x, sampling_rate = librosa.load(wave_filename, sr=8000)
n_range = range(31)
Gp = [get_prediction_gain(x,n) for n in n_range]

Playback of prediction error signal



In [5]:

    
# select a prediction filter of 10 coefficients
n = 10

phi_XX = np.array([auto_correlation(x,k) for k in np.arange(1,n+1)])
    
# inefficient way to calculate R_XX as many terms can be reused due to symmetric of the auto-correlation function
R_XX = np.array([[auto_correlation(x,i-j) for j in range(n)] for i in range(n)])

# optimal prediction coefficients
aopt = np.linalg.inv(R_XX) @ phi_XX

# generate prediction signal
xh = lfilter(np.concatenate(([0], aopt)), 1, x)

# prediction error
d = x - xh

Playback original signal



In [6]:

    
# playback original audio
ipd.Audio(x, rate=sampling_rate)









    Out[6]:

Playback predicted signal



In [7]:

    
# playback original audio
ipd.Audio(xh, rate=sampling_rate)









    Out[7]:

Playback prediction error



In [8]:

    
# playback original audio
ipd.Audio(d, rate=sampling_rate)









    Out[8]:

Plot waveforms



In [9]:

    
x_max = np.max([np.max(x), -np.min(x)])

plt.figure(figsize=(10, 11))
plt.subplot(3,1,1)
librosa.display.waveplot(x, sr=sampling_rate)
plt.title('Original')
plt.ylim((-x_max*1.2,+x_max*1.2))
plt.subplot(3,1,2)
librosa.display.waveplot(xh, sr=sampling_rate)
plt.title('Predicted signal')
plt.ylim((-x_max*1.2,+x_max*1.2))
plt.subplot(3,1,3)
librosa.display.waveplot(d, sr=sampling_rate)
plt.title('Prediction error')
plt.ylim((-x_max*2.2,+x_max*2.2))
plt.tight_layout()

Second case: Block adaptation of predictor coefficients

We extract blocks of the signal using a rectangular window $w[k]$ of length $N$ $$ \tilde{x}[k] = x[k]\cdot w[k_0-k] $$
The signal $\tilde{x}[k]$ attains non-zero values only within the time interval $$ k_1 = k_0-N+1 \leq k \leq k_0 $$ The short-term auto-correlation for finite-energy signals is defined as ($\forall\lambda \in\{0,1,\ldots, n\}$) $$ r_\lambda = \sum_{k=k_1}^{k_0}\tilde{x}[k]\tilde{x}[k+\lambda] = \sum_{k=k_1}^{k_0-\lambda}x[k]x[k+\lambda] $$ Due to symmetry ($r_{-\lambda} = r_\lambda$), the normal equations can be rewritten as $$ \begin{pmatrix} r_1 \\ r_2 \\ r_2 \\ \vdots \\ r_n \end{pmatrix} = \begin{pmatrix} r_0 & r_1 & r_2 & \cdots & r_{n-1} \\ r_1 & r_2 & r_3 & \cdots & r_{n-2} \\ r_2 & r_3 & r_4 & \cdots & r_{n-3} \\ \vdots & \vdots & \vdots & \ddots & \vdots \\ r_{n-1} & r_{n-2} & r_{n-3} & \cdots & r_{0} \\ \end{pmatrix}\begin{pmatrix} a_1 \\ a_2 \\ a_3 \\ \vdots \\ a_n \end{pmatrix} $$ The solution of this system yields optimal predictor coefficients $\boldsymbol{a}_{\text{opt}}$ for this block.



In [15]:

    
# helper function calculating the auto-correlation coefficient r
def get_r(x, l):    
    x_shift = np.roll(x,l)
    x_shift[:l] = 0        
    return np.correlate(x,x_shift)[0]
    
    
def get_prediction_coefficients(x, n):
    r = np.array([get_r(x,k) for k in np.arange(1,n+1)])
    R = np.array([np.concatenate(([get_r(x,j) for j in np.arange(i,0,-1)], [get_r(x,j) for j in np.arange(0,n-i)])) for i in range(n)])    
    
    a_opt = np.linalg.inv(R) @ r
    return a_opt



In [23]:

    
# block prediction with N samples per block and predictor order n
def block_prediction(x, n, N):
    if n==0:
        return x
    
    frames = int(np.floor(len(x) / N))

    # prediction error
    d = np.zeros_like(x)

    # no adaptation for first frame
    a = np.zeros(n)

    # filter memory
    memory = np.zeros(n)

    for k0 in range(frames):
        x_part = x[np.arange(N)+k0*N]

        # compute prediction coefficients on current block
        a = get_prediction_coefficients(x_part, n)               
        
        xh_part, memory = lfilter(np.concatenate(([0], a)), 1, x_part, zi=memory)
        d[np.arange(len(xh_part)) + k0*N] = x_part - xh_part
        

    return d



In [24]:

    
# compute prediction gains for two different audio files, 20 ms blocks
N = int(sampling_rate * 0.02)

Gp_block = []
for n in n_range:
    d = block_prediction(x, n, N)        
    Gp_block.append(np.mean(np.square(x)) / np.mean(np.square(d)))



In [31]:

    
plt.figure(figsize=(8, 5))

plt.plot(n_range, 10*np.log10(Gp), ms=5, marker='o')

plt.plot(n_range, 10*np.log10(Gp_block), '--',ms=5, marker='o')

    
plt.xlim((0,30))
plt.ylim((0,15))
plt.xlabel('$n$')
plt.grid(True)
plt.ylabel('$10\log_{10}(G_{\mathsf{p}})$')
plt.legend(['Fixed predictor coefficients', 'Block adaptation ($N=%d$)' % N], loc=4)         
plt.savefig('figure_DST_6.11.pdf',bbox_inches='tight')

Generate prediction error and predicted signal



In [32]:

    
n = 30
d = block_prediction(x, n, N)    
xh = x-d

Playback original signal



In [33]:

    
ipd.Audio(x, rate=sampling_rate)









    Out[33]:

Playback predicted signal



In [35]:

    
ipd.Audio(xh, rate=sampling_rate)









    Out[35]:

Playback prediction error



In [36]:

    
ipd.Audio(d, rate=sampling_rate)









    Out[36]:

Plot waveforms



In [37]:

    
x_max = np.max([np.max(x), -np.min(x)])

plt.figure(figsize=(10, 11))
plt.subplot(3,1,1)
librosa.display.waveplot(x, sr=sampling_rate)
plt.title('Original')
plt.ylim((-x_max*1.2,+x_max*1.2))
plt.subplot(3,1,2)
librosa.display.waveplot(xh, sr=sampling_rate)
plt.title('Predicted signal')
plt.ylim((-x_max*1.2,+x_max*1.2))
plt.subplot(3,1,3)
librosa.display.waveplot(d, sr=sampling_rate)
plt.title('Prediction error')
plt.ylim((-x_max*2.2,+x_max*2.2))
plt.tight_layout()



In [ ]: