In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import lfilter
import scipy.signal as sp
import librosa
import librosa.display
import IPython.display as ipd
Load wave file and play original, unquantized version
In [2]:
wave_filename = '../audio/33711__acclivity__excessiveexposure.wav'
# load file, do resample to 8 KhZ (as used in the DPCM system)
x, sampling_rate = librosa.load(wave_filename, sr=8000)
x_max = max(abs(x))
# playback original audio
ipd.Audio(x, rate=sampling_rate)
Out[2]:
Again, we employ $w=6$ bit per sample and use scalar quantization merely for illustration purposes. We play the original and the reconstructed signal.
First, we calculate the optimal prediction coefficients, where we assume a prediction order of $n=8$
In [122]:
# prediction order
n = 8
# helper function calculating the correlation coefficient as defined in the lecture
def auto_correlation(x, k):
if k >= 0:
x_shift = np.roll(x,k)
x_shift[:k] = 0
else:
x_shift = np.roll(x,k)
x_shift[k:] = 0
return np.correlate(x,x_shift)[0] / len(x)
# helper function for sign that assigns "+1" to zero
def my_sign(x):
if np.isscalar(x):
if np.isclose(x,0):
return +1.0
else:
return np.sign(x)
else:
retval = np.sign(x)
retval[np.isclose(x,np.zeros_like(x))] = +1.0
return retval
# signal auto-correlation
phi_XX = np.array([auto_correlation(x,k) for k in np.arange(1,n+1)])
# inefficient way to calculate R_XX as many terms can be reused due to symmetric of the auto-correlation function
R_XX = np.array([[auto_correlation(x,i-j) for j in range(n)] for i in range(n)])
# optimal prediction coefficients
a_fixed = np.linalg.inv(R_XX) @ phi_XX
In [125]:
# generate residual signal
d = lfilter(np.concatenate(([1], -a_fixed)), 1, x)
# quantize residual signal
w = 6
# fix x_max based on the current signal, leave some room
x_max = np.max([np.max(d), -np.min(d)])
Delta_x = x_max / (2**(w-1))
xh_max = (2**(w-1)) * Delta_x / 2
d_quantized = my_sign(d)*Delta_x*(np.floor(np.abs(d)/Delta_x)+0.5)
In [126]:
# reconstruct signal and playback both original and quantized version
y = lfilter([1], np.concatenate(([1], -a_fixed)), d_quantized)
print('Audio playback of original file')
ipd.display(ipd.Audio(x, rate=sampling_rate))
print('Audio playback of recovered file (after open-loop DPCM)')
ipd.display(ipd.Audio(y, rate=sampling_rate))
Now, we implement the transmitter part with closed-loop DPCM
In [127]:
# memory of the predictor, initialize with zeros, contains the input to the prediction filter, i.e., \tilde{x}
memory = np.zeros(n)
d_quantized_cl = np.zeros_like(x, dtype=float)
for k in range(len(x)):
# use python's built-in dot product operator "@" to compute x_hat
x_hat = a_fixed @ memory
d = x[k] - x_hat
d_quantized_cl[k] = my_sign(d)*Delta_x*(np.floor(np.abs(d)/Delta_x)+0.5)
# saturate
if d_quantized_cl[k] < -xh_max :
d_quantized_cl[k] = -xh_max
if d_quantized_cl[k] > +xh_max :
d_quantized_cl[k] = +xh_max
# update memory of predictor
memory = np.concatenate(([d_quantized_cl[k]+x_hat], memory[:-1]))
Now, the receiver part, reconstruct the signal and play back all three signals.
In [129]:
# reconstruct signal and playback both original and quantized version
y_cl = lfilter([1], np.concatenate(([1], -a_fixed)), d_quantized_cl)
print('Audio playback of original file')
ipd.display(ipd.Audio(x, rate=sampling_rate))
print('Audio playback of recovered file (open-loop DPCM)')
ipd.display(ipd.Audio(y, rate=sampling_rate))
print('Audio playback of recovered file (closed-loop DPCM)')
ipd.display(ipd.Audio(y_cl, rate=sampling_rate))