Content and Objectives

  • Confirm theoretical values for variations and combinations
  • Large number of tuples are sampled and occurrences of according events are being counted

Import


In [1]:
# importing
import numpy as np
from scipy import special

import time

Parameters


In [2]:
# parameters of the combinatoric model
N = 7
K = 4

# number of trials to be simulated
N_trials = int( 1e4 )

In [3]:
start = time.time()

Function for Sampling with Parametrization


In [4]:
def count_samples( N, K, N_trials, respect_order, return_elements ):
    '''
    Function generating samples and counting their number 
    
    IN: N = size of set out of which elements are sampled
        K = size of sample 
        N_trials = number of trials for simulation
        
        respect_order = order of samples mattering (or not), 
                        resulting in variations or combinations, respectively; 
                        boolean
                        
        return_elements =   returning sampled elements to box (or not); 
                            boolean
        
    OUT: numbers of different samples
    '''
    
    # check that sample size is feasible
    assert return_elements == 1 or K <= N, 'Sample has to be feasible!'
    
    # empty list for collecting samples
    collected = [] 
    
    # loop for realizations
    for _n in range( N_trials ):

        # sample subset
        sample = list( np.random.choice( N, K, replace = return_elements ) )

        # sort sample if required
        if not respect_order:
            sample.sort()
        
        # add sample to history if not observed yet
        if sample in collected:
            continue
        else:
            collected.append( sample )   
            
    return len( collected )

Simulations


In [5]:
print('\nSample with order, returning elements:')
print('---------------------------------------------\n')

# get values
theo = N**K 
sim = count_samples( N, K, N_trials, respect_order = 1, return_elements = 1 )

print('Theoretical value:\t\t\t{}'.format( theo )  )
print('Different tuples in simulation: \t{}'.format( sim ) )


Sample with order, returning elements:
---------------------------------------------

Theoretical value:			2401
Different tuples in simulation: 	2372

In [6]:
print('\nSample with order, not returning elements:')
print('---------------------------------------------\n')

# get values
# NOTE: upper limit not included in arange -> increase by 1
theo = np.prod( np.arange( N-K+1, N+1 ) )
sim = count_samples( N, K, N_trials, respect_order = 1, return_elements = 0 )

print('Theoretical value:\t\t\t{}'.format( theo )  )
print('Different tuples in simulation: \t{}'.format( sim ) )


Sample with order, not returning elements:
---------------------------------------------

Theoretical value:			840
Different tuples in simulation: 	840

In [7]:
print('\nSample without order, not returning elements:')
print('---------------------------------------------\n')
     
# get values
theo = special.binom( N, K ).astype( int )
sim = count_samples( N, K, N_trials, respect_order = 0, return_elements = 0 )

print('Theoretical value:\t\t\t{}'.format( theo )  )
print('Different tuples in simulation: \t{}'.format( sim ) )


Sample without order, not returning elements:
---------------------------------------------

Theoretical value:			35
Different tuples in simulation: 	35

In [8]:
print('\nSample without order, returning elements:')
print('---------------------------------------------\n')
        
# get values
theo = special.binom( N+K-1, K ).astype( int )
sim = count_samples( N, K, N_trials, respect_order = 0, return_elements = 1 )

print('Theoretical value:\t\t\t{}'.format( theo )  )
print('Different tuples in simulation: \t{}'.format( sim ) )


Sample without order, returning elements:
---------------------------------------------

Theoretical value:			210
Different tuples in simulation: 	210

Exercise

Quiz: Can you reason how to speed-up simulation while maintaining accuracy?

Show Time for Simulation


In [9]:
print('Elapsed: {}'.format( time.time() - start ))


Elapsed: 1.9693851470947266