In [5]:
%pylab inline

numbers = np.random.normal(0,1,100000000)


Populating the interactive namespace from numpy and matplotlib

In [11]:
def get_ecdf(data):
    x_sorted = np.sort(data)
    n = x_sorted.size
    y = np.arange(1, n+1) / n
    return x_sorted, y

In [12]:
x, y = get_ecdf(numbers)

In [13]:
plt.scatter(x=x, y=y)


Out[13]:
<matplotlib.collections.PathCollection at 0x2b2e57d5f9b0>

In [20]:
sampled_index = np.random.choice(np.arange(x.size), size=100)
plt.scatter(x=x[sampled_index], y=y[sampled_index])


Out[20]:
<matplotlib.collections.PathCollection at 0x2b2e57ac0cc0>

In [16]:
len(sampled_points)


Out[16]:
1000

In [ ]: