In [11]:
## implement weighted random sampling
import numpy as np
# only function needed is "random_sample" ... replace with sklearn version?
def weighted_sample(a, N_samples, p, replace=True):
# a is array from which we are sampliing
# N_samples is the number of samples
# p is the vector of probabilities
if len(a) != len(p):
raise ValueError("probability vector must have the same length as array")
if np.abs(sum(p) - 1) >= 10**(-15):
raise ValueError("probabilities must sum to 1")
if replace: # sample with replacement
cdf = np.zeros(np.size(p))
for i in range(len(a)):
cdf[i] = sum(p[0:i+1])
uniform_samples = np.random.uniform(0,1, N_samples) # this is the step that uses numpy
index = np.searchsorted(cdf, uniform_samples, side = 'right') # write our own searchsorted?
output = [a[i] for i in index]
else: # sample without replacement
if N_samples>len(a):
raise ValueError("N_samples must be less than length of array for sampling without replacement")
n = 0
output = [None] * N_samples
while n < N_samples:
cdf = np.zeros(np.size(p))
for i in range(len(a)):
cdf[i] = sum(p[0:i+1])
cdf /= cdf[-1]
uniform_sample = np.random.uniform(0,1) # this is the step that uses numpy
index = np.searchsorted(cdf, uniform_sample, side = 'right') # write our own searchsorted?
print('\na: ', a)
print('p: ', p)
print('cdf: ', cdf)
print('rng: ', uniform_sample)
print('index: ', index)
output[n] = a[index]
# without replacement
del a[index]
del p[index]
n = n+1
return(output)
In [12]:
weighted_sample([4, 2, 18, 3], 4, [0.1, 0.3, 0.2, 0.4], replace=False)
Out[12]:
In [6]:
dim = 10
N_samples = 10**(6)
p = np.random.uniform(0,1,dim)
p /= sum(p)
samples = weighted_sample(range(dim),N_samples, p)
a_samples = np.array(samples)
sample_prop = np.zeros(dim)
for i in range(dim):
sample_prop[i] = sum(a_samples==i)
sample_prop = sample_prop/N_samples
print(sample_prop)
print(p)
In [ ]:
In [ ]:
In [ ]: