https://stats.stackexchange.com/questions/how-do-i-quantify-the-uniformity-of-sampling-time
In [1]:
%pylab inline
from __future__ import division
import scipy as sp
import seaborn as sns
sns.set_style('whitegrid')
sns.set_context('paper', font_scale=2)
np.random.seed(42)
In [2]:
def calculate_shannon_entropy(p):
'''
Parameters
----------
p: list
list of probability values such that sum(p) = 1
Returns
-------
entropy: float
Shannon Entropy
'''
assert np.allclose(sum(p), 1)
entropy = -np.nansum(np.array(p) * np.log2(np.array(p)).T)
return entropy
def calculate_uniformity_index(x, a, b):
'''
Parameters
----------
'''
In [3]:
plt.plot(np.arange(0, 1.1, 0.1), [calculate_shannon_entropy([x, 1-x]) for x in np.arange(0, 1.1, 0.1)])
Out[3]:
In [4]:
entropies = []
for n in range(1, 100):
p_vec = [1/n] * n
entropies.append(calculate_shannon_entropy(p_vec))
plt.plot(entropies)
Out[4]:
In [31]:
## Uniform index
hist_bins = 20
s = np.random.uniform(1,10,1000)
count, bins, ignored = plt.hist(s, 10, normed=True)
count, boundaries = np.histogram(s, bins=1000)
normalized_count = count/np.sum(count)
U = np.sqrt(1+(10-1)**2)/np.sum(np.sqrt(1+normalized_count**2))
(U-0.707)/(1-0.707)
Out[31]:
https://arxiv.org/pdf/1508.01146.pdf defines uniformity index as : $$ \mathcal{U}(f_X) = \frac{\sqrt{1+(b-a)^2}}{\sqrt{1+[f_X(t)]^2}dt} $$
In [43]:
count, boundaries = np.histogram(s, bins=1000, density=True)
In [48]:
sum(np.diff(boundaries)*count)
Out[48]:
In [50]:
#denom = np.trapz(np.sqrt(1+count**2), np.diff(boundaries))
denom = sum(np.diff(boundaries)*np.sqrt(1+count**2))
denom
Out[50]:
In [51]:
U = np.sqrt(1+(10-1)**2)/denom
(U-0.707)/(1-0.707)
Out[51]:
In [52]:
s[np.where( (s>7) & (s<9) )] = 0
In [54]:
#count = np.histogram(s)
#normalized_count = count[0]/np.sum(count[0])
count, boundaries = np.histogram(s, bins=1000, density=True)
U = np.sqrt(1+(10-1)**2)/np.sum(np.sqrt(1+normalized_count**2))
In [56]:
denom = sum(np.diff(boundaries)*np.sqrt(1+count**2))
U = np.sqrt(1+(10-1)**2)/denom
(U-0.707)/(1-0.707)
Out[56]:
In [ ]:
len(count)
In [58]:
s[np.where( (s>1) )] = 0
count, bins, ignored = plt.hist(s, 10, normed=True)
In [64]:
count, boundaries = np.histogram(s, bins=10, density=True)
denom = sum(np.diff(boundaries)*np.sqrt(1+count**2))
U = np.sqrt(1+(2-1)**2)/denom
(U-0.707)/(1-0.707)
Out[64]:
In [60]:
denom
Out[60]:
In [62]:
count, bins, ignored = plt.hist(s, 10, normed=True)
In [ ]: