In [ ]:
import bet.postProcess.compareP as compP
from helpers import *
import matplotlib.pyplot as plt
In [ ]:
num_samples_left = 50
num_samples_right = 50
delta = 0.5 # width of measure's support per dimension
L = unit_center_set(2, num_samples_left, delta)
R = unit_center_set(2, num_samples_right, delta)
In [ ]:
plt.scatter(L._values[:,0], L._values[:,1], c=L._probabilities)
plt.xlim([0,1])
plt.ylim([0,1])
plt.show()
In [ ]:
plt.scatter(R._values[:,0], R._values[:,1], c=R._probabilities)
plt.xlim([0,1])
plt.ylim([0,1])
plt.show()
In [ ]:
num_emulation_samples = 2000
mm = compP.compare(L, R, num_emulation_samples) # initialize metric
In [ ]:
# mm.get_left().get_values()
In [ ]:
# mm.get_right().get_values()
We are going to create a comparison object which contains sets that are proper subsets of the original (we will be dividing the number of samples in half). However, since the Voronoi cells that are implicitly defined and consitute the $\sigma$-algebra are going to be fundamentally different, we observe that the two densities reflect the differences in geometry.
Our chosen densities are uniform and centered in the middle of the domain. The integration sample set is copied during the clipping procedure by default, but can be changed by passing copy=False to clip if you prefer the two comparisons are linked.
In [ ]:
# cut both sample sets in half
mc = mm.clip(num_samples_left//2,num_samples_right//2)
In [ ]:
# mc.get_left().get_values()
In [ ]:
# mc.get_right().get_values()
Observe how these are distinctly different objects in memory:
In [ ]:
mm, mc
In [ ]:
ld1,rd1 = mm.estimate_density()
In [ ]:
I = mc.get_emulated().get_values()
In [ ]:
plt.scatter(I[:,0], I[:,1], c=rd1,s =10, alpha=0.5)
plt.scatter(R._values[:,0], R._values[:,1], marker='o', s=50, c='k')
plt.xlim([0,1])
plt.ylim([0,1])
plt.title("Right Density")
plt.show()
In [ ]:
plt.scatter(I[:,0], I[:,1], c=ld1, s=10, alpha=0.5)
plt.scatter(L._values[:,0], L._values[:,1], marker='o', s=50, c='k')
plt.xlim([0,1])
plt.ylim([0,1])
plt.title("Left Density")
plt.show()
In [ ]:
ld2,rd2 = mc.estimate_density()
In [ ]:
plt.scatter(I[:,0], I[:,1], c=rd2,s =10, alpha=0.5)
plt.scatter(mc.get_right()._values[:,0],
mc.get_right()._values[:,1],
marker='o', s=50, c='k')
plt.xlim([0,1])
plt.ylim([0,1])
plt.title("Right Density")
plt.show()
In [ ]:
plt.scatter(I[:,0], I[:,1], c=ld2, s=10, alpha=0.5)
plt.scatter(mc.get_left()._values[:,0],
mc.get_left()._values[:,1],
marker='o', s=50, c='k')
plt.xlim([0,1])
plt.ylim([0,1])
plt.title("Left Density")
plt.show()
In [ ]:
from scipy.stats import entropy as kl_div
mm.set_left(unit_center_set(2, 1000, delta/2))
mm.set_right(unit_center_set(2, 1000, delta))
print([mm.value(kl_div),
mm.value('tv'),
mm.value('totvar'),
mm.value('mink', w=0.5, p=1),
mm.value('norm'),
mm.value('sqhell'),
mm.value('hell'),
mm.value('hellinger')])
In [ ]:
import ipywidgets as wd
In [ ]:
def show_clip(samples=100, delta=0.5):
np.random.seed(int(121))
S = unit_center_set(2, samples, delta)
compP.density(S)
plt.figure()
plt.scatter(S._values[:,0], S._values[:,1],
c=S._density.ravel())
plt.show()
In [ ]:
wd.interact(show_clip, samples=(20,500), delta=(0.05,1,0.05))
Below, we show an example of using the comparison object to get a better picture of the sets defined above, without necessarily needing to compare two measures.
In [ ]:
import scipy.stats as sstats
In [ ]:
def show_clipm(samples=100, delta=0.5):
np.random.seed(int(121))
S = unit_center_set(2, samples, delta)
# alternative probabilities
xprobs = sstats.distributions.norm(0.5, delta).pdf(S._values[:,0])
yprobs = sstats.distributions.norm(0.5, delta).pdf(S._values[:,1])
probs = xprobs*yprobs
S.set_probabilities(probs*S._volumes)
I = mm.get_emulated()
m = compP.comparison(I,S,None)
m.estimate_density_left()
plt.figure()
plt.scatter(I._values[:,0], I._values[:,1],
c=S._emulated_density.ravel())
plt.scatter([0.5], [0.5], marker='x')
plt.show()
In [ ]:
wd.interact(show_clipm, samples=(20,500), delta=(0.1,1,0.05))
Change num_integration_samples at the top of the notebook, then re-run the notebook. Try changing the values of delta both above and in the interactive examples. Notice how our approximation error is more pronouned when delta is large.
Try setting S._probabilities with S.set_probabilities() to something non-uniform.
Try passing S.clip(samples//2) as the second argument to compP.comparison in the second interactive example and either replacing estimate_density_left with estimate_density or simply adding estimate_density_right() below. Plot the resulting right density estimate either as a separate subplot or on the same axes.
In [ ]: