In [4]:
%matplotlib inline

import random
import numpy as np
import matplotlib.pyplot as plt

In [6]:
# roll a dice n times and return the list
def roll_dice(n):
    l = []
    for i in range(n):
        d = random.randint(1,6)
        l += [d]
    return l

In [7]:
# expected value = 3.5
v = 0
for i in range(10):
    v += np.mean(roll_dice(10))

print v/10.0


3.43

In [12]:
# error 
np.abs(np.mean(roll_dice(100)) - 3.5)


Out[12]:
0.14999999999999991

In [89]:
# Law of Large Numbers
def sim_error(n):
    err = 0
    for i in range(50):
        err += np.abs(np.mean(roll_dice(n)) - 3.5)
    
    return err/50

In [92]:
x = range(1,120)
y = []
z = []

for i in x:
    t = sim_error(i)
    y += [t]
    z += [1.36/np.sqrt(i)]

plt.plot(x,y)
plt.plot(x,z)
plt.show()


It can be shown that the curve is $\frac{\sigma(X)}{\sqrt(n)}$
Var(X) = 91/6 for dice rolling


In [65]:
# Check whether error^2 approaches var(x)/n for large n
n = 1000
print sim_error(n)
print np.sqrt(2.9167)/np.sqrt(n)


0.04307
0.0540064810926

Central Limit Theorem - Non identical distributions

Sum of dice rolls and coin tosses


In [102]:
def toss_coin(n):
    l = []
    for i in range(n):
        d = random.randint(0,1)
        l += [d]
    return l   

n = 100
h = []
for i in range(1000):
    sum = reduce(lambda x, y: x+y, toss_coin(100) + roll_dice(100))
    h.append(sum/100.0)
    
plt.hist(h)
plt.title("Gaussian Histogram")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.show()


$E[(\frac{\sum_{i=1}^{n}x_i}{n} - 3.5)^2]$