In [1]:
# importing
import numpy as np
from scipy import special
import time
In [2]:
# parameters of the combinatoric model
N = 7
K = 4
# number of trials to be simulated
N_trials = int( 1e4 )
In [3]:
start = time.time()
In [4]:
def count_samples( N, K, N_trials, respect_order, return_elements ):
'''
Function generating samples and counting their number
IN: N = size of set out of which elements are sampled
K = size of sample
N_trials = number of trials for simulation
respect_order = order of samples mattering (or not),
resulting in variations or combinations, respectively;
boolean
return_elements = returning sampled elements to box (or not);
boolean
OUT: numbers of different samples
'''
# check that sample size is feasible
assert return_elements == 1 or K <= N, 'Sample has to be feasible!'
# empty list for collecting samples
collected = []
# loop for realizations
for _n in range( N_trials ):
# sample subset
sample = list( np.random.choice( N, K, replace = return_elements ) )
# sort sample if required
if not respect_order:
sample.sort()
# add sample to history if not observed yet
if sample in collected:
continue
else:
collected.append( sample )
return len( collected )
In [5]:
print('\nSample with order, returning elements:')
print('---------------------------------------------\n')
# get values
theo = N**K
sim = count_samples( N, K, N_trials, respect_order = 1, return_elements = 1 )
print('Theoretical value:\t\t\t{}'.format( theo ) )
print('Different tuples in simulation: \t{}'.format( sim ) )
In [6]:
print('\nSample with order, not returning elements:')
print('---------------------------------------------\n')
# get values
# NOTE: upper limit not included in arange -> increase by 1
theo = np.prod( np.arange( N-K+1, N+1 ) )
sim = count_samples( N, K, N_trials, respect_order = 1, return_elements = 0 )
print('Theoretical value:\t\t\t{}'.format( theo ) )
print('Different tuples in simulation: \t{}'.format( sim ) )
In [7]:
print('\nSample without order, not returning elements:')
print('---------------------------------------------\n')
# get values
theo = special.binom( N, K ).astype( int )
sim = count_samples( N, K, N_trials, respect_order = 0, return_elements = 0 )
print('Theoretical value:\t\t\t{}'.format( theo ) )
print('Different tuples in simulation: \t{}'.format( sim ) )
In [8]:
print('\nSample without order, returning elements:')
print('---------------------------------------------\n')
# get values
theo = special.binom( N+K-1, K ).astype( int )
sim = count_samples( N, K, N_trials, respect_order = 0, return_elements = 1 )
print('Theoretical value:\t\t\t{}'.format( theo ) )
print('Different tuples in simulation: \t{}'.format( sim ) )
Quiz: Can you reason how to speed-up simulation while maintaining accuracy?
In [9]:
print('Elapsed: {}'.format( time.time() - start ))