Entropy is a measure of impurity of a node. Higher entropy means the node has higher degree of impurity.
$$Entropy = -\displaystyle \sum_j p(j|t) \log p(j|t)$$
In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas
%matplotlib inline
In [32]:
sigma = np.array([[0.14,-0.1],[-0.1,0.14]])
mu1 = np.array([-0.4,0.2])
mu2 = np.array([0.4,0.5])
d1 = np.random.multivariate_normal(mean=mu1, cov=sigma, size=200)
d2 = np.random.multivariate_normal(mean=mu2, cov=sigma, size=200)
x1 = np.concatenate((d1[:,0], d2[:,0]))
x2 = np.concatenate((d1[:,1], d2[:,1]))
labels = np.concatenate((['Positive']*d1.shape[0],
['Negative']*d2.shape[0]))
df = pandas.DataFrame(dict(x1=x1, x2=x2, y=labels))
df = df.reindex(np.random.permutation(df.index))
sns.lmplot("x1", "x2", hue="y", data=df, fit_reg=False)
Out[32]:
In [28]:
Out[28]:
In [ ]: