In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
In [2]:
import matplotlib
#import seaborn
#seaborn.set()
In [3]:
def maxn_sample(n):
return max(np.random.random(n))
In [4]:
N = 10000
n = 3
df = pd.DataFrame(dict(data=[maxn_sample(n) for x in range(N)]))
In [5]:
df.data.plot(kind='hist')
Out[5]:
In [6]:
df['sample'] = np.random.random(N)
df['three'] = df.sample ** (1/n)
In [7]:
df.plot(kind='hist', histtype='step')
Out[7]:
Theorem:
If $x^n$ is a uniform random variate, then distribution of x is the same as $max(a_1,...,a_n) where $a_i$ are uniform variates.
In [8]:
import collections
In [9]:
import sys
sys.path.append("/home/jng/devel")
In [10]:
from everest import ladybower
In [11]:
data = [(x+1, x) for x in range(100)]
#data = [(1 + (x//33.333), x) for x in range(100)]
res = ladybower.WeightedReservoir(data)
In [12]:
res.isample_without_replacement(1)
Out[12]:
In [13]:
counter = collections.Counter()
for trial in range(10000):
sample = res.isample_without_replacement(10)
counter.update(sample)
In [14]:
from matplotlib import pyplot
In [15]:
keys = counter.keys()
values = [counter[x] for x in keys]
In [16]:
sum(values), sum(range(1, 101))
Out[16]:
In [17]:
keys = [x for x in keys]
In [18]:
pyplot.scatter(keys, values)
Out[18]:
In [18]: