Use of GMMs to Model Zero-Dispersion Optical Channel

This code is provided as supplementary material of the lecture Machine Learning and Optimization in Communications (MLOC).

This code illustrates

  • Using Gaussian Mixture Models (GMMs) to approximate the PDF of the channel output and the conditional PDFs of a (complex) transmission channel
  • Using the approximated channel transition PDF to compute the achievable information rate (AIR) of the channel

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import interactive
import ipywidgets as widgets
from sklearn import mixture
%matplotlib inline

Specify the parameters of the transmission as the fiber length $L$ (in km), the fiber nonlinearity coefficienty $\gamma$ (given in 1/W/km) and the total noise power $P_n$ (given in dBM. The noise is due to amplified spontaneous emission in amplifiers along the link). We assume a model of a dispersion-less fiber affected by nonlinearity. The model, which is described for instance in [1] is given by an iterative application of the equation $$ \tilde{t}_{k+1} = \tilde{t}_k\exp\left(\jmath\frac{L}{K}\gamma|\tilde{t}_k|^2\right) + n_{k+1},\qquad 0 \leq k < K $$ where $\tilde{t}_0=t$ is the channel input (the modulated, complex symbols) and $r=\tilde{t}_K$ is the channel output. $K$ denotes the number of steps taken to simulate the channel. Usually $K=50$ gives a good approximation.

Here, we specify a 16-QAM to communicate over the channel.

[1] S. Li, C. Häger, N. Garcia, and H. Wymeersch, "Achievable Information Rates for Nonlinear Fiber Communication via End-to-end Autoencoder Learning," Proc. European Conference on Optical Communications (ECOC), Rome, Sep. 2018


In [2]:
# Length of transmission (in km)
L = 5000

# fiber nonlinearity coefficient
gamma = 1.27

Pn = -21.3 # noise power (in dBm)

Kstep = 10 # number of steps used in the channel model


constellations = {'16-QAM': np.array([-3,-3,-3,-3,-1,-1,-1,-1,1,1,1,1,3,3,3,3]) + 1j*np.array([-3,-1,1,3,-3,-1,1,3,-3,-1,1,3,-3,-1,1,3]), \
                  '16-APSK': np.array([1,-1,0,0,1.4,1.4,-1.4,-1.4,3,-3,0,0,5,-5,0,0]) + 1j*np.array([0,0,1,-1,1.4,-1.4,1.4,-1.4,0,0,4,-4,0,0,6,-6])}

def simulate_channel(x, Pin, constellation):      
    # modulate 16-qam
    assert all(x >= 0)
    assert all(x < len(constellation))
    # input power (in W), normalize to input power
    input_power_linear = 10**((Pin-30)/10)
    norm_factor = 1 / np.sqrt(np.mean(np.abs(constellation)**2)/input_power_linear)
    modulated = constellation[x] * norm_factor

    # noise variance per step    
    sigma = np.sqrt((10**((Pn-30)/10)) / Kstep / 2)    

    # channel model
    temp = np.array(modulated, copy=True)
    for i in range(Kstep):
        power = np.absolute(temp)**2
        rotcoff = (L / Kstep) * gamma * power
        
        temp = temp * np.exp(1j*rotcoff) + sigma*(np.random.randn(len(x)) + 1j*np.random.randn(len(x)))
    return temp

We consider 16-QAM transmission over this channel.

Show constellation as a function of the fiber input power. When the input power is small, the effect of the nonlinearity is small (as $\jmath\frac{L}{K}\gamma|x_k|^2 \approx 0$) and the transmission is dominated by the additive noise. If the input power becomes larger, the effect of the noise (the noise power is constant) becomes less pronounced, but the constellation rotates due to the larger input power and hence effect of the nonlinearity.


In [3]:
length_plot = 4000

def plot_constellation(Pin, constellation_name):
    constellation = constellations[constellation_name]
        
    t = np.random.randint(len(constellation),size=length_plot)
    r = simulate_channel(t, Pin, constellation)

    plt.figure(figsize=(12,6))
    font = {'size'   : 14}
    plt.rc('font', **font)
    plt.rc('text', usetex=True)
    plt.subplot(1,2,1)
    r_tx = constellation[range(len(constellation))]
    plt.scatter(np.real(r_tx), np.imag(r_tx), c=range(len(constellation)), marker='o', s=200, cmap='tab20')
    plt.xticks(())
    plt.yticks(())
    plt.axis('equal')
    plt.xlabel(r'$\Re\{r\}$',fontsize=14)
    plt.ylabel(r'$\Im\{r\}$',fontsize=14)
    plt.title('Transmitted constellation')
    
    plt.subplot(1,2,2)
    plt.scatter(np.real(r), np.imag(r), c=t, cmap='tab20',s=4)
    plt.xlabel(r'$\Re\{r\}$',fontsize=14)
    plt.ylabel(r'$\Im\{r\}$',fontsize=14)
    plt.axis('equal')
    plt.title('Received constellation ($L = %d$\,km, $P_{in} = %1.2f$\,dBm)' % (L, Pin))    
    #plt.savefig('%s_received_zd_%1.2f.pdf' % (constellation_name.replace('-','_'),Pin),bbox_inches='tight')
    
interactive_update = interactive(plot_constellation, \
                                 Pin = widgets.FloatSlider(min=-10.0,max=10.0,step=0.1,value=1, continuous_update=False, description='Input Power Pin (dBm)', style={'description_width': 'initial'}, layout=widgets.Layout(width='50%')), \
                                 constellation_name = widgets.RadioButtons(options=['16-QAM','16-APSK'], value='16-QAM',continuous_update=False,description='Constellation'))


output = interactive_update.children[-1]
output.layout.height = '400px'
interactive_update



In [48]:
constellation = constellations['16-QAM']

# here, use 8*16 = 128 classes per points
m = 8*len(constellation)

Pin = -4
length_train = 16000

t = np.random.randint(len(constellation),size=length_train)
r = simulate_channel(t, Pin, constellation)

# generate training set
X_train = np.column_stack( (np.real(r), np.imag(r)) )
    

    # use sklearn toolbox to compute the GMMs
clf = mixture.GaussianMixture(n_components=m, covariance_type='full')
clf.fit(X_train)


# plot
# display predicted scores by the model as a contour plot
x = np.linspace(1.1*min(np.real(r)), 1.1*max(np.real(r)), 200)
y = np.linspace(1.1*min(np.imag(r)), 1.1*max(np.imag(r)), 200)
X, Y = np.meshgrid(x, y)
XX = np.array([X.ravel(), Y.ravel()]).T
Z = np.exp(clf.score_samples(XX))
Z = Z.reshape(X.shape)

plt.figure(figsize=(12,5.5))
plt.subplot(121)
plt.scatter(np.real(r), np.imag(r), c=t, cmap='tab20',s=1)
plt.xlim((x[0], x[-1]))
plt.ylim((y[0], y[-1]))
plt.axis('scaled')
plt.xlabel(r'$\Re\{r\}$',fontsize=14)
plt.ylabel(r'$\Im\{r\}$',fontsize=14)
plt.title('Received constellation ($L = %d$\,km, $P_{in} = %1.1f$\,dBm)' % (L,Pin))

plt.subplot(122)
CS = plt.contourf(X, Y, Z, levels=30, cmap='viridis')
#for c in CS.collections:
#    c.set_edgecolor("face")
#    c.set_linewidth(0.000000000001)
    
plt.xlim((x[0], x[-1]))
plt.ylim((y[0], y[-1]))
plt.axis('scaled')
plt.xlabel(r'$\Re\{r\}$',fontsize=14)
plt.ylabel(r'$\Im\{r\}$',fontsize=14)
plt.title('$p(r)$ using GMM')
#plt.savefig('16_QAM_received_GMM.pdf',bbox_inches='tight')


Now we train a GMM for each constellation point individually. We will use this one to compute the mutual information (and hence the performance of a transmission scheme) later


In [65]:
def train_GMMs(t,r,m, constellation):
    symbols = range(len(constellation))
    retval = [mixture.GaussianMixture(n_components=m, covariance_type='full') for k in symbols]
    for k in symbols:
        idx = t == k
        X_train = np.column_stack( (np.real(r[idx]), np.imag(r[idx])) )
        retval[k].fit(X_train)
    return retval

symbol_gmms= train_GMMs(t,r,10, constellations['16-QAM'])
plt.figure(figsize=(14,7))
plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05, hspace=.05)
for k in range(len(constellation)):
    Z = np.exp(symbol_gmms[k].score_samples(XX))
    Z = Z.reshape(X.shape)
    plt.subplot(3,6,k+1)
    CS = plt.contourf(X, Y, Z, levels=30, cmap='viridis')
    #for c in CS.collections:
    #    c.set_edgecolor("face")
    #    c.set_linewidth(0.000000000001)
    plt.xlim((x[0], x[-1]))
    plt.ylim((y[0], y[-1]))
    plt.xticks(())
    plt.yticks(())
    plt.text(.99, .01, '$p(\\textsf{r}|\\textsf{t}=s_{%d})$' % (k+1), transform=plt.gca().transAxes, size=15, horizontalalignment='right', color='w')
#plt.savefig('16_QAM_conditional_PDF_GMM_m10.pdf' ,bbox_inches='tight')



Computation of Mutual Information Using GMMs

Next, we would like to compute the mutual information $I(\mathsf{t};\mathsf{r})$ between channel input and channel output. Remember that the mutual information is defined as \begin{align*} I(\textsf{t};\textsf{r}) &= \int_{r\in\mathbb{C}}\sum_{i=1}^{16}p(r,\mathsf{t}=s_i)\log_2\left(\frac{p(r,\mathsf{t}=s_i)}{p(r)P(\mathsf{t}=s_1)}\right)\\ &= \int_{r\in\mathbb{C}}\sum_{i=1}^{16}p(r,\mathsf{t}=s_i)\log_2\left(\frac{p(r|\mathsf{t}=s_i)}{p(r)}\right) \\ &= \int_{r\in\mathbb{C}}\sum_{i=1}^{16}p(r,\mathsf{t}=s_i)\log_2\left(\frac{p(r|\mathsf{t}=s_i)}{\sum_{j=1}^{16}p(r|\mathsf{t}=s_j)P(\mathsf{t}=s_j)}\right) \end{align*} In principle, we could use $p(\mathsf{r}|\mathsf{t})$, which we approximated by a GMM and then numerically integrate over the complex plane $\mathbb{C}$, however, this numerical integration is complex and requires many evaluations of the circular Gaussian PDF if a high accuracy should be obtained.

In this notebook, we use the auxiliary channel method of [2]. According to [2], we can bound the mutual information as \begin{align*} I(\textsf{t};\textsf{r}) &= \int_{r\in\mathbb{C}}\sum_{i=1}^{16}p(r,\mathsf{t}=s_i)\log_2\left(\frac{p(r|\mathsf{t}=s_i)}{\sum_{j=1}^{16}p(r|\mathsf{t}=s_j)P(\mathsf{t}=s_j)}\right)\\ &\stackrel{(a)}{\geq} \int_{r\in\mathbb{C}}\sum_{i=1}^{16}p(r,\mathsf{t}=s_i)\log_2\left(\frac{q(r|\mathsf{t}=s_i)}{\sum_{j=1}^{16}q(r|\mathsf{t}=s_j)P(\mathsf{t}=s_j)}\right) \end{align*} where $(a)$ is due to [2] and $q(r|t)$ is an estimate of the true conditional channel probability density $p(r|t)$. The bound is tight if $q(r|t) = p(r|t)$. We can further continue \begin{align*} I(\textsf{t};\textsf{r}) &\geq \int_{r\in\mathbb{C}}\sum_{i=1}^{16}p(r,\mathsf{t}=s_i)\log_2\left(\frac{q(r|\mathsf{t}=s_i)}{\sum_{j=1}^{16}q(r|\mathsf{t}=s_j)P(\mathsf{t}=s_j)}\right) \\ &= \mathbb{E}_{\textsf{r},\textsf{t}\sim p(\textsf{r},\textsf{t})}\left\{\log_2\left(\frac{q(\mathsf{r}|\mathsf{t})}{\sum_{j=1}^{16}q(\mathsf{r}|\mathsf{t}=s_j)P(\mathsf{t}=s_j)}\right)\right\} \\ &\stackrel{(b)}{\approx} \frac{1}{N}\sum_{i=1}^N\log_2\left(\frac{q(r_i|t_i)}{\sum_{j=1}^{16}q(r_i|\mathsf{t}=s_j)P(\mathsf{t}=s_j)}\right) \end{align*} where in $(b)$ we use the Monte Carlo estimate of the expectation, i.e., replace the expectation by the sample mean taken over the $N$ sample points that we generated. Note that ideally, we should take different samples to train the GMM than to compute the mutual information, as we would overfit severely (training set versus validation set). If $N$ is large enough, this approximation becomes exact.

For computing $I(\textsf{r};\textsf{t})$, we rearrange the bound as follows: \begin{align*} I(\textsf{r};\textsf{t}) \gtrapprox \underline{I}(\mathsf{r};\mathsf{t}) := \frac{1}{N}\sum_{i=1}^N\left(\log_2\left(q(r_i|t_i)\right) - \log_2\left(\sum_{j=1}^{16}q(r_i|\mathsf{t}=s_j)P(\mathsf{t}=s_j)\right)\right) \end{align*} where $q(r_i|\mathsf{t}=s_j)$ is obtained from the $j$'th GMM measurement as \begin{equation*} q(r_i|\mathsf{t}=s_j) = \sum_{k=1}^m\pi_k\mathcal{N}(\begin{pmatrix}\Re\{r_i\}&\Im\{r_i\}\end{pmatrix}^\mathrm{T}; \boldsymbol{\mu}_{j,k}, \boldsymbol{\Sigma}_{j,k}) \end{equation*} As we need to compute $\log_2(\sum_{k=1}^m\pi_k\mathcal{N}(\begin{pmatrix}\Re\{r_i\}&\Im\{r_i\}\end{pmatrix}^\mathrm{T}; \boldsymbol{\mu}_{j,k}, \boldsymbol{\Sigma}_{j,k}))$, we can use the fact that the function score_samples of the GaussianMixture class returns directly $\log(\sum_{k=1}^m\pi_k\mathcal{N}(\begin{pmatrix}\Re\{r_i\}&\Im\{r_i\}\end{pmatrix}^\mathrm{T}; \boldsymbol{\mu}_{j,k}, \boldsymbol{\Sigma}_{j,k}))$, which we just need to scale by $1/\log(2)$.

In the following, we compute and plot the mutual information for two different constellations. The results obtained by this method are lower bounds of the mutual information, i.e., we have a conservative estimate of what is really possible. Note that we also do not optimize over the input distribution $P(\mathsf{t}=s_i)$ which we could in order to improve further the achievable rate.

[2] D. M. Arnold, H.-A. Loeliger, P. O. Vontobel, A. Kavcic, and W. Zeng, "Simulation-Based Computation of Information Rates for Channels with Memory," IEEE Transactions on Information Theory, vol. 52, no. 8, Aug. 2006


In [33]:
# compute MI
def compute_MI_auxiliarymethod(m, constellation, Pin, length_train):
    # first train the GMMs, for this generate a training set first
    t_train = np.random.randint(len(constellation),size=length_train)
    r_train = simulate_channel(t_train, Pin, constellation)
    
    # train the GMMs for each symbol (i.e., the conditional channel pdf)
    symbol_gmms= train_GMMs(t_train, r_train, m, constellation)
                          
    # now generate the validation set, used to compute the mutual information 
    t = np.random.randint(len(constellation),size=length_train)
    r = simulate_channel(t, Pin, constellation)
        
    # compute priors from the data
    # first compute Py
    Px = np.zeros(len(constellation))

    X = np.column_stack( (np.real(r), np.imag(r)) )
    
    numerator = np.zeros(len(r))
    denominator = np.zeros(len(r))
    
    for k in range(len(constellation)):
        idx = t == k
        Px[k] = np.mean(idx)
        numerator[idx] = symbol_gmms[k].score_samples(X[idx,:])
        denominator += Px[k]*np.exp(symbol_gmms[k].score_samples(X))
    
    numerator /= np.log(2)
    denominator = np.log2(denominator)
    
    numerator[np.isinf(numerator)] = 0
    denominator[np.isinf(denominator)] = 0
    numerator[np.isnan(numerator)] = 0
    denominator[np.isnan(denominator)] = 0
    return np.mean(numerator - denominator)


length_train = 64000

Pin_range = np.linspace(-15,10,num=26)

# compute mutual information for varying values of m and 16-qam
MI_varm = {2: np.zeros(len(Pin_range)), 5: np.zeros(len(Pin_range)), 10: np.zeros(len(Pin_range)), 20: np.zeros(len(Pin_range)), 30: np.zeros(len(Pin_range)), 60: np.zeros(len(Pin_range))}
MI_varconst = {'16-QAM': np.zeros(len(Pin_range)), '16-APSK': np.zeros(len(Pin_range))}

for m in MI_varm:
    print('Computing mutual information for 16-QAM and m = %d classes per point ...' % m)
    for count,Pin in enumerate(Pin_range):
        MI_varm[m][count] = compute_MI_auxiliarymethod(m, constellations['16-QAM'], Pin, length_train)    
        
        
for c_name in MI_varconst:
    print('Computing mutual information for constellation %s ...' % c_name)
    for count,Pin in enumerate(Pin_range):
        MI_varconst[c_name][count] = compute_MI_auxiliarymethod(128, constellations[c_name], Pin, length_train)


Computing mutual information for 16-QAM and m = 2 classes per point ...
Computing mutual information for 16-QAM and m = 5 classes per point ...
Computing mutual information for 16-QAM and m = 10 classes per point ...
Computing mutual information for 16-QAM and m = 20 classes per point ...
Computing mutual information for 16-QAM and m = 30 classes per point ...
Computing mutual information for 16-QAM and m = 60 classes per point ...
Computing mutual information for constellation 16-QAM ...
Computing mutual information for constellation 16-APSK ...

In [70]:
plt.figure(figsize=(14,5))
plt.subplot(121)
plt.plot(Pin_range,MI_varm[2])
plt.plot(Pin_range,MI_varm[5])
plt.plot(Pin_range,MI_varm[10])
plt.plot(Pin_range,MI_varm[20])
plt.plot(Pin_range,MI_varm[60])
plt.xlim((-15,10))
plt.ylim((2,5))
plt.yticks((2,3,4,5))
plt.grid(True)
plt.legend(['$m=2$','$m=5$','$m=10$','$m=20$','$m=60$'])
plt.title('16-QAM Transmission over $L=5000$\,km')
plt.xlabel('$P_{\\textrm{in}}$ (dB)')
plt.ylabel('$\\underline{I}(\mathsf{r};\mathsf{t})$ (bit/channel use)')

plt.subplot(122)
plt.plot(Pin_range,MI_varconst['16-QAM'])
plt.plot(Pin_range,MI_varconst['16-APSK'])
plt.xlim((-15,10))
plt.ylim((2,5))
plt.yticks((2,3,4,5))
plt.grid(True)
plt.legend(['16-QAM', '16-APSK'])
plt.title('Transmission over $L=5000$\,km ($m=128$)')
plt.xlabel('$P_{\\textrm{in}}$ (dB)')
plt.ylabel('$\\underline{I}(\mathsf{r};\mathsf{t})$ (bit/channel use)')
#plt.savefig('MIestimates_GMM.pdf',bbox_inches='tight')