notebook.community

Edit and run



In [4]:

    
%matplotlib inline

import random
import numpy as np
import matplotlib.pyplot as plt



In [6]:

    
# roll a dice n times and return the list
def roll_dice(n):
    l = []
    for i in range(n):
        d = random.randint(1,6)
        l += [d]
    return l



In [7]:

    
# expected value = 3.5
v = 0
for i in range(10):
    v += np.mean(roll_dice(10))

print v/10.0



In [12]:

    
# error 
np.abs(np.mean(roll_dice(100)) - 3.5)









    Out[12]:





0.14999999999999991



In [89]:

    
# Law of Large Numbers
def sim_error(n):
    err = 0
    for i in range(50):
        err += np.abs(np.mean(roll_dice(n)) - 3.5)
    
    return err/50



In [92]:

    
x = range(1,120)
y = []
z = []

for i in x:
    t = sim_error(i)
    y += [t]
    z += [1.36/np.sqrt(i)]

plt.plot(x,y)
plt.plot(x,z)
plt.show()

It can be shown that the curve is $\frac{\sigma(X)}{\sqrt(n)}$
Var(X) = 91/6 for dice rolling



In [65]:

    
# Check whether error^2 approaches var(x)/n for large n
n = 1000
print sim_error(n)
print np.sqrt(2.9167)/np.sqrt(n)









    



0.04307
0.0540064810926

Central Limit Theorem - Non identical distributions

Sum of dice rolls and coin tosses



In [102]:

    
def toss_coin(n):
    l = []
    for i in range(n):
        d = random.randint(0,1)
        l += [d]
    return l   

n = 100
h = []
for i in range(1000):
    sum = reduce(lambda x, y: x+y, toss_coin(100) + roll_dice(100))
    h.append(sum/100.0)
    
plt.hist(h)
plt.title("Gaussian Histogram")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.show()

$E[(\frac{\sum_{i=1}^{n}x_i}{n} - 3.5)^2]$