In [5]:
%pylab inline
numbers = np.random.normal(0,1,100000000)
In [11]:
def get_ecdf(data):
x_sorted = np.sort(data)
n = x_sorted.size
y = np.arange(1, n+1) / n
return x_sorted, y
In [12]:
x, y = get_ecdf(numbers)
In [13]:
plt.scatter(x=x, y=y)
Out[13]:
In [20]:
sampled_index = np.random.choice(np.arange(x.size), size=100)
plt.scatter(x=x[sampled_index], y=y[sampled_index])
Out[20]:
In [16]:
len(sampled_points)
Out[16]:
In [ ]: