In [1]:
%matplotlib inline
import numpy as np
import pandas as pd

In [2]:
import matplotlib
#import seaborn
#seaborn.set()

In [3]:
def maxn_sample(n):

    return max(np.random.random(n))

In [4]:
N = 10000
n = 3
df = pd.DataFrame(dict(data=[maxn_sample(n) for x in range(N)]))

In [5]:
df.data.plot(kind='hist')


<matplotlib.figure.Figure at 0xaffcf76c>
Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0xaffcffec>

In [6]:
df['sample'] = np.random.random(N)
df['three'] = df.sample ** (1/n)

In [7]:
df.plot(kind='hist', histtype='step')


<matplotlib.figure.Figure at 0xaffff54c>
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0xaffe8c8c>

Theorem:

If $x^n$ is a uniform random variate, then distribution of x is the same as $max(a_1,...,a_n) where $a_i$ are uniform variates.


In [8]:
import collections

In [9]:
import sys
sys.path.append("/home/jng/devel")

In [10]:
from everest import ladybower

In [11]:
data = [(x+1, x) for x in range(100)]
#data = [(1 + (x//33.333), x) for x in range(100)]
res = ladybower.WeightedReservoir(data)

In [12]:
res.isample_without_replacement(1)


Out[12]:
array([52])

In [13]:
counter = collections.Counter()

for trial in range(10000):
    sample = res.isample_without_replacement(10)

    counter.update(sample)

In [14]:
from matplotlib import pyplot

In [15]:
keys = counter.keys()
values = [counter[x] for x in keys]

In [16]:
sum(values), sum(range(1, 101))


Out[16]:
(100000, 5050)

In [17]:
keys = [x for x in keys]

In [18]:
pyplot.scatter(keys, values)


<matplotlib.figure.Figure at 0xaff3d24c>
Out[18]:
<matplotlib.collections.PathCollection at 0xafe5292c>

In [18]: