notebook.community

Edit and run



In [7]:

    
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import euclidean_distances



In [4]:

    
X = np.random.normal(size=(10, 10))

Entropy

Based on the paper in Mitra paper. Distance is based on this:

$$D_{pq} = \left( \sum_{j=1}^M \left(\frac{x_{p,j} - x_{q,j}}{\text{max}_j - \text{min}_j}\right)^2 \right)^{1/2}$$

This is simply MinMaxScaler with euclidean distance.

Then we will further define

$$\text{sim}(p, q) = e^{-\alpha D_{pq}}$$

Where $\alpha = \frac{-\log 0.5}{\bar{D}}$ and $\bar{D}$ is the average distance computed between data points for the whole dataset.

Then using this, we can calculate Entropy:

$$-\sum_{p=1}^l \sum_{q=1}^l (\text{sim}(p, q) \times \log sim(p, q) + (1-\text{sim}(p, q))\times \log(1-\text{sim}(p, q)))$$



In [6]:

    
mm = MinMaxScaler()
X_mm = mm.fit_transform(X)



In [25]:

    
np.exp(np.array([1,1,1,1,1,1]))









    Out[25]:





array([ 2.71828183,  2.71828183,  2.71828183,  2.71828183,  2.71828183,
        2.71828183])



In [36]:

    
def entropy(X):
    mm = MinMaxScaler()
    X_mm = mm.fit_transform(X)
    Dpq = euclidean_distances(X_mm)
    D_bar = np.mean([x for x in np.triu(Dpq).flatten() if x != 0])
    alpha = -np.log(0.5)/D_bar
    sim_pq = np.exp(-alpha * Dpq)
    log_sim_pq = np.log(sim_pq)
    entropy = -2*np.sum(np.triu(sim_pq*log_sim_pq + ((1-sim_pq)*np.log((1-sim_pq))), 1))
    return entropy



In [41]:

    
entropy(np.random.normal(size=(10, 2)))









    



c:\users\chapm\anaconda3\lib\site-packages\ipykernel\__main__.py:9: RuntimeWarning: divide by zero encountered in log
c:\users\chapm\anaconda3\lib\site-packages\ipykernel\__main__.py:9: RuntimeWarning: invalid value encountered in multiply






    Out[41]:





56.444984076641092



In [42]:

    
from sklearn.mixture import BayesianGaussianMixture



In [61]:

    
bgm = BayesianGaussianMixture(n_components=10)



In [62]:

    
X = np.random.normal(size=(1000,)).reshape(-1, 1)



In [63]:

    
bgm.fit(X)









    



c:\users\chapm\anaconda3\lib\site-packages\sklearn\mixture\base.py:237: ConvergenceWarning: Initialization 1 did not converge. Try different init parameters, or increase max_iter, tol or check for degenerate data.
  % (init + 1), ConvergenceWarning)






    Out[63]:





BayesianGaussianMixture(covariance_prior=None, covariance_type='full',
            degrees_of_freedom_prior=None, init_params='kmeans',
            max_iter=100, mean_precision_prior=None, mean_prior=None,
            n_components=10, n_init=1, random_state=None, reg_covar=1e-06,
            tol=0.001, verbose=0, verbose_interval=10, warm_start=False,
            weight_concentration_prior=None,
            weight_concentration_prior_type='dirichlet_process')



In [64]:

    
bgm.predict(X)









    Out[64]:





array([1, 2, 0, 1, 1, 1, 2, 0, 0, 1, 1, 0, 1, 1, 0, 2, 1, 0, 0, 0, 0, 2, 0,
       2, 2, 2, 2, 1, 2, 0, 2, 2, 0, 0, 2, 0, 2, 0, 0, 2, 1, 0, 0, 0, 0, 2,
       2, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 1, 1, 2, 0, 1, 2,
       1, 1, 2, 1, 0, 1, 2, 2, 1, 0, 2, 0, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 1,
       2, 1, 1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 2, 2, 1, 2, 0, 2, 0, 2, 1, 1, 0,
       1, 1, 0, 1, 2, 1, 2, 1, 2, 0, 2, 0, 2, 0, 0, 0, 1, 1, 0, 2, 1, 2, 2,
       1, 0, 2, 1, 2, 1, 0, 1, 2, 2, 1, 2, 2, 0, 2, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 1, 2, 2, 1, 1, 0, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 1, 2, 1, 2, 0, 0, 2, 2, 2, 0, 2, 0, 0, 1, 2, 0, 2, 1, 0,
       0, 0, 0, 0, 1, 2, 1, 1, 1, 2, 0, 0, 0, 0, 2, 2, 0, 0, 1, 0, 0, 2, 1,
       0, 2, 1, 2, 0, 1, 0, 2, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0, 1,
       1, 1, 2, 2, 2, 2, 0, 2, 0, 0, 1, 0, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1,
       0, 0, 2, 1, 2, 1, 0, 2, 2, 1, 0, 2, 2, 1, 2, 1, 1, 2, 2, 0, 0, 0, 0,
       0, 2, 2, 1, 0, 0, 0, 2, 2, 1, 0, 1, 2, 0, 0, 2, 1, 1, 2, 0, 2, 1, 0,
       0, 2, 1, 0, 1, 2, 2, 1, 1, 2, 2, 2, 1, 0, 0, 2, 1, 1, 0, 0, 0, 1, 1,
       0, 1, 2, 0, 2, 2, 0, 0, 1, 0, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 1, 0, 1,
       2, 0, 0, 1, 0, 1, 2, 1, 0, 0, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 2, 0, 2, 0, 0, 1, 2, 1, 1, 1, 1, 2, 2, 0, 0, 0, 0, 0, 2,
       1, 0, 1, 0, 1, 1, 1, 0, 0, 2, 0, 1, 0, 2, 1, 2, 2, 2, 1, 1, 0, 1, 0,
       0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 2, 1, 0, 0, 2, 2, 2, 0, 0, 0, 1, 1, 0,
       0, 1, 1, 0, 0, 2, 0, 1, 1, 0, 1, 2, 0, 0, 2, 1, 2, 1, 1, 2, 0, 2, 2,
       0, 2, 0, 1, 2, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 2, 1,
       1, 1, 1, 1, 0, 2, 0, 2, 2, 2, 2, 0, 0, 1, 2, 1, 1, 2, 1, 1, 1, 0, 0,
       0, 1, 2, 0, 2, 2, 0, 0, 1, 0, 0, 2, 0, 0, 1, 1, 0, 1, 0, 2, 0, 2, 0,
       1, 1, 0, 2, 0, 1, 2, 0, 2, 1, 0, 1, 2, 1, 0, 1, 0, 2, 0, 0, 1, 0, 1,
       2, 0, 1, 0, 1, 0, 2, 0, 1, 0, 0, 1, 1, 2, 0, 2, 1, 2, 0, 1, 2, 1, 0,
       2, 1, 1, 1, 0, 0, 1, 0, 0, 2, 0, 1, 2, 0, 0, 0, 2, 1, 2, 1, 2, 1, 1,
       2, 1, 0, 1, 0, 1, 2, 1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 1, 0, 2, 0, 2, 2,
       0, 2, 2, 1, 1, 0, 2, 2, 0, 0, 0, 2, 1, 1, 1, 0, 0, 1, 0, 1, 0, 2, 2,
       0, 1, 0, 1, 0, 2, 2, 0, 0, 1, 2, 2, 0, 2, 2, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 1, 2, 2, 0, 0, 2, 1, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 2, 0, 2, 1, 2, 0, 1, 2, 1, 0, 1, 0, 1, 0, 1, 2, 1, 0, 1, 0, 2,
       2, 0, 2, 0, 1, 0, 1, 2, 2, 1, 2, 2, 1, 2, 2, 0, 1, 2, 2, 0, 1, 1, 0,
       1, 2, 0, 0, 0, 2, 1, 2, 0, 1, 1, 2, 2, 0, 1, 1, 2, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 2, 0, 1, 0, 2, 1, 0, 0, 1, 1, 0, 2, 1, 1, 0, 0, 0, 1,
       2, 0, 2, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 2, 1, 2, 2, 0, 2,
       0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 2, 0, 2, 1, 1, 2, 1, 0, 2,
       0, 2, 0, 2, 0, 0, 2, 0, 0, 2, 0, 1, 2, 1, 0, 2, 2, 0, 0, 1, 0, 0, 0,
       2, 1, 0, 1, 0, 0, 0, 2, 0, 2, 1, 2, 1, 1, 0, 0, 0, 2, 0, 1, 0, 1, 2,
       2, 1, 0, 0, 1, 1, 1, 2, 1, 0, 2, 1, 2, 1, 2, 0, 1, 0, 0, 1, 0, 1, 2,
       1, 2, 1, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 2, 1, 1, 0, 1, 0, 2, 1,
       0, 2, 0, 0, 2, 0, 0, 2, 0, 1, 1, 2, 1, 1, 0, 0, 2, 0, 1, 2, 2, 0, 1,
       1, 0, 0, 1, 0, 1, 1, 0, 2, 2, 0, 0, 2, 1, 2, 1, 0, 1, 0, 0, 1, 1, 1,
       0, 0, 2, 1, 0, 0, 1, 0, 0, 1, 1], dtype=int64)