In [1]:
%matplotlib inline
from sklearn.neighbors import KernelDensity
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
In [2]:
# values = np.random.normal(size=(100, 2))
values = np.concatenate((np.random.rand(100, 2) * 100, np.random.rand(25,2)*5))
x_lim = [np.min(values[:,0]), np.max(values[:,0])]
y_lim = [np.min(values[:,1]), np.max(values[:,1])]
In [3]:
sns.distplot(values[:,0])
sns.distplot(values[:,1])
Out[3]:
In [4]:
plt.plot(values[:,0], values[:,1], 'o')
Out[4]:
In [5]:
density = KernelDensity()
density.fit(values)
Out[5]:
In [6]:
samples = density.sample(10)
scores = density.score_samples(samples)
df = pd.DataFrame({ 'x': samples[:,0], 'y': samples[:,1], 'score': scores})
df
Out[6]:
In [7]:
argmin_score = np.argmin(scores)
min_sample = samples[argmin_score]
min_score = scores[argmin_score]
print("Min idx: %d, sample: %s, score: %.2f" % (argmin_score, min_sample, min_score))