In [11]:
## implement weighted random sampling
import numpy as np 
# only function needed is "random_sample" ... replace with sklearn version?

def weighted_sample(a, N_samples, p, replace=True): 
    # a is array from which we are sampliing
    # N_samples is the number of samples
    # p is the vector of probabilities
    
    if len(a) != len(p): 
        raise ValueError("probability vector must have the same length as array")
    if np.abs(sum(p) - 1) >= 10**(-15):
        raise ValueError("probabilities must sum to 1")

    
    if replace: # sample with replacement
        cdf = np.zeros(np.size(p))
        for i in range(len(a)): 
            cdf[i] = sum(p[0:i+1])
        
        uniform_samples = np.random.uniform(0,1, N_samples) # this is the step that uses numpy
        
        index = np.searchsorted(cdf, uniform_samples, side = 'right') # write our own searchsorted?
        output = [a[i] for i in index]

    else: # sample without replacement
        if N_samples>len(a): 
            raise ValueError("N_samples must be less than length of array for sampling without replacement")
        
        n = 0
        output = [None] * N_samples
        while n < N_samples: 
        
            cdf = np.zeros(np.size(p))
            for i in range(len(a)): 
                cdf[i] = sum(p[0:i+1])
            cdf /= cdf[-1]
            
            uniform_sample = np.random.uniform(0,1) # this is the step that uses numpy
            index = np.searchsorted(cdf, uniform_sample, side = 'right') # write our own searchsorted?
            
            print('\na: ', a)
            print('p: ', p)
            print('cdf: ', cdf)
            print('rng: ', uniform_sample)
            print('index: ', index)
            
            output[n] = a[index]

            # without replacement
            del a[index]
            del p[index]
            
            n = n+1
        
    return(output)

In [12]:
weighted_sample([4, 2, 18, 3], 4, [0.1, 0.3, 0.2, 0.4], replace=False)


a:  [4, 2, 18, 3]
p:  [0.1, 0.3, 0.2, 0.4]
cdf:  [ 0.1  0.4  0.6  1. ]
rng:  0.8826538786088812
index:  3

a:  [4, 2, 18]
p:  [0.1, 0.3, 0.2]
cdf:  [ 0.16666667  0.66666667  1.        ]
rng:  0.27935630809250067
index:  1

a:  [4, 18]
p:  [0.1, 0.2]
cdf:  [ 0.33333333  1.        ]
rng:  0.07513796195365241
index:  0

a:  [18]
p:  [0.2]
cdf:  [ 1.]
rng:  0.7242457516458572
index:  0
Out[12]:
[3, 2, 4, 18]

In [6]:
dim = 10
N_samples = 10**(6)
p = np.random.uniform(0,1,dim)
p /= sum(p)

samples = weighted_sample(range(dim),N_samples, p)

a_samples = np.array(samples)

sample_prop = np.zeros(dim)
for i in range(dim): 
    sample_prop[i] = sum(a_samples==i)

sample_prop = sample_prop/N_samples

print(sample_prop)
print(p)


[ 0.021478  0.112714  0.040159  0.099587  0.088963  0.150803  0.231457
  0.082905  0.101185  0.070749]
[ 0.02135717  0.11318231  0.0400311   0.09910058  0.08883451  0.15112294
  0.23165852  0.08279344  0.10089532  0.07102412]

In [ ]:


In [ ]:


In [ ]: