SNR Benefits of Adaptive Scalar Quantization

This code is provided as supplementary material of the lecture Quellencodierung.

This code illustrates

Uniform scalar quantization of audio files with midrise and midtread characteristic
Adaptive uniform quantization with forward estimation (AQF)
Adaptive uniform quantization with backward estimation (AQF)
Illustration of the segmental SNR benefits



In [1]:

    
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import librosa
import librosa.display
import IPython.display as ipd

Load and display wave file



In [84]:

    
wave_filename = '../audio/33711__acclivity__excessiveexposure.wav'

x, sampling_rate = librosa.load(wave_filename)
print('Sampling rate: %d' % sampling_rate)









    



Sampling rate: 22050

Midrise quantization



In [76]:

    
# Sample to 5 bit ... 16 quantization levels
w = 5

# fix x_max based on the current signal, leave some room
x_max = np.max([np.max(x), -np.min(x)])
Delta_x = x_max / (2**(w-1))

xh_uniform_midrise = np.sign(x)*Delta_x*(np.floor(np.abs(x)/Delta_x)+0.5)

Compute segmental SNR (with $n_\mathsf{s} = 256$) $$ \textrm{segSNR}[k]\bigg|_{\mathrm{dB}} = 10\log_{10}\left(\frac{\sum_{i=1}^{n_s}(x[(k-1)n_{\mathsf{s}}+i])^2}{\sum_{i=1}^{n_s}(x[(k-1)n_{\mathsf{s}}+i]-\hat{x}[(k-1)n_{\mathsf{s}}+i])^2}\right) $$



In [77]:

    
# Compute SNR
noise = xh_uniform_midrise - x

# number of samples used for segmentation
seg_len = 256
segments = int(np.floor(len(x)/seg_len))

x_seg = np.reshape(x[0:segments*seg_len],(segments,seg_len))
xh_seg = np.reshape(xh_uniform_midrise[0:segments*seg_len],(segments,seg_len))
snr_uniform_midrise_seg = 10*np.log10(np.mean(np.square(x_seg),axis=1) / np.mean(np.square(xh_seg - x_seg),axis=1))

Uniform Quantization with Midtread Quantizer



In [78]:

    
# fix x_max based on the current signal, leave some room
x_max = np.max(x)
Delta_x = x_max / (2**(w-1))


xh_uniform_midtread = np.sign(x)*Delta_x*np.floor(np.abs(x)/Delta_x+0.5)

# saturate
xh_max = (2**(w-1)*Delta_x - Delta_x)
xh_min = -(xh_max + Delta_x)
xh_uniform_midtread[xh_uniform_midtread >= xh_max] = xh_max
xh_uniform_midtread[xh_uniform_midtread <= xh_min] = xh_min



In [79]:

    
# Compute SNR
noise = xh_uniform_midtread - x

x_seg = np.reshape(x[0:segments*seg_len],(segments,seg_len))
xh_seg = np.reshape(xh_uniform_midtread[0:segments*seg_len],(segments,seg_len))
snr_uniform_midtread_seg = 10*np.log10(np.mean(np.square(x_seg),axis=1) / np.mean(np.square(xh_seg - x_seg),axis=1))

Adaptive Quantization with Forward Estimation (AQF)

For each block of length $n_{\mathsf{a}}$, estimate the instantenous power $$ \hat{\sigma}_x^2[k] = \frac{1}{n_{\mathsf{a}}}\sum_{i=0}^{n_{\mathsf{a}}-1}\left(x\left[\left\lceil{k}/{n_{\mathsf{a}}}\right\rceil+i\right]\right)^2 $$ and based on this, select the stepsize $$ \Delta[k] = c\cdot \hat{\sigma}_x[k] $$



In [118]:

    
def AQF(x,c):
    # segment length
    n_a = 256
    segments = int(np.floor(len(x)/n_a))

    x_seg = np.reshape(x[0:segments*n_a],(segments,n_a))

    sigmaq_x = np.mean(np.square(x_seg),axis=1)
    
    xh_seg = np.zeros_like(x_seg)
    
    Delta_x = np.zeros(segments)
    Delta_large = np.zeros_like(x_seg)
    for k in range(segments):
        # quantize each segment, use standard deviation
        Delta_x[k] = c * np.sqrt(sigmaq_x[k])
        
        # quantize
        temp = np.sign(x_seg[k,:])*Delta_x[k]*(np.floor(np.abs(x_seg[k,:])/Delta_x[k])+0.5)  
        
        # saturation & clipping
        xh_max = (2**w - 1)*Delta_x[k]/2
        xh_min = -xh_max
        temp[temp >= xh_max] = xh_max
        temp[temp <= xh_min] = xh_min

        xh_seg[k,:] = temp.copy()
        Delta_large[k,:] = Delta_x[k]
    # undo shaping
    xh = np.reshape(xh_seg, segments*n_a)
    Delta_return = np.reshape(Delta_large, segments*n_a)
    return xh, Delta_return

For the given signal, try multiple values of $c$ and select the one which maximizes the SNR



In [121]:

    
# find optimal c
c_range = np.linspace(0.001,1,1000)
SNRs = np.zeros(len(c_range))

for i,c in enumerate(c_range):
    xh,_ = AQF(x,c)
    SNRs[i] = 10*np.log10(np.mean(np.square(x))) - 10*np.log10(np.mean(np.square(x[0:len(xh)]-xh)))

max_ci = np.argmax(SNRs)
print('Value of c maximizing the SNR: c = %1.2f' % c_range[max_ci])









    



Value of c maximizing the SNR: c = 0.16



In [124]:

    
font = {'size'   : 20}
plt.rc('font', **font)
plt.rc('text', usetex=True)

plt.figure(figsize=(10,6))
plt.plot(c_range, SNRs,linewidth=2)
plt.xlim((0,1))
plt.ylim((0,30))
plt.grid()
plt.xlabel('$c$')
plt.ylabel('SNR')
plt.savefig('AQF_optimalc_w%d.pdf' % w, bbox_inches='tight')

To find the optimal $c$, we employ the Golden-section search. This algorithm finds the maximum over a unimodal function $f(x)$ (i.e., a function with a single maximum) over an interval $[x_1, x_4]$. Let $\varphi = \frac{1+\sqrt{5}}{2} \approx 1.618$ denote the golden ratio.

The algorithm works as follows:

Let \begin{align*} x_2 &= x_4 - \frac{x_4-x_1}{\varphi}\\ x_3 &= x_1 + \frac{x_4-x_1}{\varphi} \end{align*}
If $f(x_2) > f(x_3)$, then $x_4 \leftarrow x_3$, otherwise $x_1 \leftarrow x_2$. Then recompute \begin{align*} x_2 &= x_4 - \frac{x_4-x_1}{\varphi}\\ x_3 &= x_1 + \frac{x_4-x_1}{\varphi} \end{align*}
If $|x_2-x_3| > \epsilon$, repeat step 2.

The final value $x_{\text{opt}}$ is obtained as $\frac{1}{2}(x_1+x_4)$.



In [125]:

    
c1 = 0.1
c4 = 50
phi = (1+np.sqrt(5))/2

c2 = c4 - (c4-c1)/phi
c3 = c1 + (c4-c1)/phi

while np.abs(c4-c1) > 1e-5:
    xh,_ = AQF(x,c2)
    SNR2 = 10*np.log10(np.mean(np.square(x))) - 10*np.log10(np.mean(np.square(x[0:len(xh)]-xh)))
    
    xh,_ = AQF(x,c3)
    SNR3 = 10*np.log10(np.mean(np.square(x))) - 10*np.log10(np.mean(np.square(x[0:len(xh)]-xh)))    
    
    if SNR2 > SNR3:
        c4 = c3
    else:
        c1 = c2
    
    c2 = c4 - (c4-c1)/phi
    c3 = c1 + (c4-c1)/phi

# compute final optimizer
c_opt = (c1+c4)/2
print(c_opt)









    



0.16025229008951422

Carry out quantization for optimal $c$



In [126]:

    
# fix x_max based on the current signal, leave some room
xh_AQF, Delta_AQF = AQF(x,c_opt)



In [127]:

    
# Compute segmental SNR
x_seg = np.reshape(x[0:segments*seg_len],(segments,seg_len))
xh_seg = np.reshape(xh_AQF[0:segments*seg_len],(segments,seg_len))
snr_AQF_seg = 10*np.log10(np.mean(np.square(x_seg),axis=1) / np.mean(np.square(xh_seg - x_seg),axis=1))



In [128]:

    
# plot segmental snr
font = {'size'   : 13}
plt.rc('font', **font)
plt.rc('text', usetex=False)

plt.figure(figsize=(14, 8.5))
plt.subplot(2,1,1)
plt.plot(np.arange(segments), snr_uniform_midrise_seg, linewidth=2)
plt.plot(np.arange(segments), snr_uniform_midtread_seg, linewidth=2)
plt.plot(np.arange(segments), snr_AQF_seg, linewidth=2)
plt.xlabel('k / %d' % seg_len)
plt.ylabel('segmental SNR (dB)')
plt.ylim((-30,30))
plt.grid()

x_start = 25600 // seg_len
x_stop = 72960 // seg_len

plt.xlim((x_start,x_stop))
plt.legend(['Midrise ($w=5$)','Midtread ($w=5$)','AQF ($w=5$)'])

plt.subplot(2,1,2)
librosa.display.waveplot(x[(x_start*seg_len+seg_len//2):(x_stop*seg_len+seg_len//2)], sr=sampling_rate)
plt.tight_layout()

plt.savefig('AQF_segmentalSNR_w%d.pdf' % w, bbox_inches='tight')



In [129]:

    
# plot stepsize
font = {'size'   : 13}
plt.rc('font', **font)
plt.rc('text', usetex=False)

plt.figure(figsize=(14, 8.5))

plt.subplot(2,1,1)
plt.plot(Delta_AQF[(x_start*seg_len+seg_len//2):(x_stop*seg_len+seg_len//2)])
plt.xlim((0,(x_stop-x_start)*seg_len))
plt.xlabel('Sample $k$')
plt.ylabel('$\Delta[k]$')

plt.subplot(2,1,2)
librosa.display.waveplot(x[(x_start*seg_len+seg_len//2):(x_stop*seg_len+seg_len//2)], sr=sampling_rate)
plt.tight_layout()

plt.savefig('AQF_segmentalDelta_w%d.pdf' % w, bbox_inches='tight')

Adaptive Quantization with Backward Estimation (AQB)

The AQB algorithm works on recursive computation of new stepsizes $\Delta[k] = M[k]\cdot \Delta[k-1]$ Quantization of $x[k]$ happens according to $$ \hat{x}[k] = \mathrm{sign}(x[k])\cdot Z[k]\cdot \frac{\Delta[k]}{2} $$ with $$ Z[k] = 2 \left\lfloor\frac{x[k]}{\Delta[k]}\right\rfloor+1 $$ The new new stepsize $M[k+1]$ is usually obtained as $$ M[k+1] = \sqrt{\alpha + (1-\alpha)Z^2[k]\frac{c^2}{4}} $$ where $\alpha$ and $c$ are parameters that need to be optimized. Here we use the tables from [Jay73]

[Jay73] N. S. Jayant, "Adaptive Quantization With a One-Word Memory," Bell System Technical Journal , vol. 52, no. 7, pp. 1119--1144, 1973



In [185]:

    
M_dict = {2: [0.6, 2.2],
          3: [0.85, 1.0, 1.0, 1.5],
          4: [0.8, 0.8, 0.8, 0.8, 1.2, 1.6, 2.0, 2.4],
          5: [0.85, 0.85, 0.85, 0.85, 0.85, 0.85, 0.85, 0.85, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6]}

def AQB(x, w):
    if w in M_dict:
        Ms = M_dict[w]
        
        q_max = 2**(w-1)-1
        
        # initial stepsize 0.1, needs to be known at transmitter and receiver
        # initial multiplier 1
        Delta = 0.1
        M = 1
        
        xh = np.sign(x) # precompute signs
        Deltas = np.zeros_like(x)
        
        for k in range(len(x)):
            # compute new stepsize
            Delta = Delta * M            
            Deltas[k] = Delta
            
            # quantize, this is the operation done at the transmitter, q is being transmitter
            q = int(np.floor(np.abs(x[k])/Delta))            
            # saturate
            if q < 0:
                q = 0
            if q > q_max:
                q = q_max
            
            # reconstruct sample, this is the receiver operation
            xh[k] *= (2*q+1)*Delta/2
            
            # compute new multiplier
            M = Ms[q]
        
        return xh, Deltas
    else:
        print('AQB not defined for word length w = %d' % w)
        return [],[]

xh_AQB,Delta_AQB = AQB(x,5)



In [177]:

    
# Compute segmental SNR
x_seg = np.reshape(x[0:segments*seg_len],(segments,seg_len))
xh_seg = np.reshape(xh_AQB[0:segments*seg_len],(segments,seg_len))
snr_AQB_seg = 10*np.log10(np.mean(np.square(x_seg),axis=1) / np.mean(np.square(xh_seg - x_seg),axis=1))



In [178]:

    
# plot segmental snr
font = {'size'   : 13}
plt.rc('font', **font)
plt.rc('text', usetex=False)

plt.figure(figsize=(14, 8.5))
plt.subplot(2,1,1)
plt.plot(np.arange(segments), snr_uniform_midrise_seg, linewidth=2)
plt.plot(np.arange(segments), snr_uniform_midtread_seg, linewidth=2)
plt.plot(np.arange(segments), snr_AQF_seg, linewidth=2)
plt.plot(np.arange(segments), snr_AQB_seg, linewidth=2)
plt.xlabel('k / %d' % seg_len)
plt.ylabel('segmental SNR (dB)')
plt.ylim((-30,30))
plt.grid()

x_start = 25600 // seg_len
x_stop = 72960 // seg_len

plt.xlim((x_start,x_stop))
plt.legend(['Midrise ($w=5$)','Midtread ($w=5$)','AQF ($w=5$)', 'AQB ($w=5$)'])

plt.subplot(2,1,2)
librosa.display.waveplot(x[(x_start*seg_len+seg_len//2):(x_stop*seg_len+seg_len//2)], sr=sampling_rate)
plt.tight_layout()

plt.savefig('AQB_segmentalSNR_w%d.pdf' % w, bbox_inches='tight')



In [184]:

    
# plot stepsize of speech segement ("we will not")
font = {'size'   : 13}
plt.rc('font', **font)
plt.rc('text', usetex=False)

plt.figure(figsize=(14, 8.5))

x_start = 25600 // seg_len
x_stop = 42960 // seg_len

plt.subplot(2,1,1)
plt.plot(Delta_AQB[(x_start*seg_len+seg_len//2):(x_stop*seg_len+seg_len//2)])
plt.xlim((0,(x_stop-x_start)*seg_len))
plt.xlabel('Sample $k$')
plt.ylabel('$\Delta[k]$')

plt.subplot(2,1,2)
librosa.display.waveplot(x[(x_start*seg_len+seg_len//2):(x_stop*seg_len+seg_len//2)], sr=sampling_rate)
plt.tight_layout()

plt.savefig('AQB_segmentalDelta_w%d.pdf' % w, bbox_inches='tight')



In [ ]: