In [1]:
# importing
import numpy as np
from scipy import stats, special
import matplotlib.pyplot as plt
import matplotlib
# showing figures inline
%matplotlib inline
In [2]:
# plotting options
font = {'size' : 20}
plt.rc('font', **font)
plt.rc('text', usetex=True)
matplotlib.rc('figure', figsize=(18, 6) )
In [3]:
# theoretical values
N = 4
E_SN = N * .5
D2_SN = N / 12
t = np.arange( 0, N, .01 )
f_theo_norm = 1 / np.sqrt( 2*np.pi*D2_SN ) * np.exp( - ( t - E_SN )**2 / 2 / D2_SN )
# simulation
N_trials = int( 1e4 )
# get uniformly distributed values and map according to theorem
X = np.random.rand( N, N_trials )
Z = np.sum( X, axis = 0 )
In [3]:
# plotting
plt.plot( t, f_theo_norm, linewidth=2.0, label='Normal')
plt.hist( Z, 100, density=True, label='$S_4$', alpha=0.75)
plt.xlabel('$x, n$')
plt.ylabel('$f(x), H_{{{}}}(n)$'.format(N_trials))
plt.grid( True )
plt.legend( loc = 'upper right' )
Out[3]:
In [4]:
# number of students and seats, respectively
N_students = 350
N_seats = 333
# probability of participation
p = 0.95
# number of trials for estimating probability
N_trials = int( 1e4 )
In [5]:
# initialize list for number of participants
participants = []
# loop for trials
for _n in range( N_trials ):
# sample if student is participating
# NOTE: By condition "random uniform < p", probability of participation equalling p is being realized
sample = np.random.rand( N_students )
number = np.sum( [ int( s <= p ) for s in sample ] )
participants.append( number )
# find histogram
bins = range( 0, N_students )
p_hist = np.histogram( participants, bins=bins, density=True)
# get cdf and find smallest index such that P > 0.99
# NOTE: Since we are dealing with indices, ...
# solution is always an integer
# +1 is added since indexing starts at 0
cdf = np.cumsum( p_hist[0] )
N_needed = np.min( np.where( cdf > 0.99 ) ) + 1
In [6]:
# printing results
print('Simulation: \t\t\t\t{:2.3f}\n'.format( N_needed ) )
approximation = stats.norm.ppf( 0.99 ) * np.sqrt( N_students * p * (1-p) ) + N_students * p
approximation_wc = stats.norm.ppf( 0.99 ) * np.sqrt( N_students * p * (1-p) ) + N_students * p + 0.5
print('Approximation by CLT: \t\t\t{:2.3f}'.format( approximation ) )
print('Approximation by CLT w. correction: \t{:2.3f}\n'.format( approximation_wc ) )
In [7]:
# initialize counter for fail exam
fail = 0
# loop for trials
for _n in range( N_trials ):
# sample if student is participating
sample = np.random.rand( N_students )
participating = np.sum( [ int( s <= p ) for s in sample ] )
# check if number of participants exceed limit
if participating > N_seats:
fail += 1
In [8]:
# printing results
print('Simulation: \t\t\t\t{:2.3f}'.format( fail / N_trials ) )
theoretical = np.sum( [ special.binom( N_students, k ) * p**k * (1-p)**(N_students-k) for k in range( N_seats + 1, N_students + 1 ) ] )
print('Theoretical value: \t\t\t{:2.3f}\n'.format( theoretical ) )
approx_clt = 1 - stats.norm.cdf( ( N_seats - N_students * p ) / np.sqrt( N_students * p * (1-p ) ) )
print('Approximation by CLT: \t\t\t{:2.3f}'.format( approx_clt ) )
approx_clt_w_corr = 1 - stats.norm.cdf( ( N_seats - N_students * p + 0.5) / np.sqrt( N_students * p * (1-p ) ) )
print('Approximation by CLT w. correction: \t{:2.3f}'.format( approx_clt_w_corr ) )
In [ ]: