In [1]:
import time
import math
import random
from matplotlib import pyplot
%matplotlib inline
import numpy

Normal Distribution


In [2]:
num_trials = 1000000
random.seed(time.time())
array = numpy.ndarray(shape=(num_trials))

In [3]:
for i in range(0,num_trials):
    x1 = random.random()
    x2 = random.random()
    f1 = math.sqrt(-2*math.log(x1))*math.cos(2*math.pi*x2)
    f2 = math.sqrt(-2*math.log(x1))*math.sin(2*math.pi*x2)
    array[i] = f1

In [4]:
bins = list(range(-5,7))
for i in range(0,len(bins)):
    bins[i] = bins[i] - 0.5

print(bins)


[-5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5]

In [5]:
hist_norm = pyplot.hist(array, bins)
fig = pyplot.gcf


Binomial distribution


In [6]:
number_of_families = 1000000
random.seed(time.time())
trial_array = numpy.ndarray((number_of_families))
n_children_max = 0 
total = 0

In [7]:
for i in range(0,number_of_families):
    n_sons = 0
    n_children = 0 
    while n_sons < 2:
        coin_flip = random.random()
        if coin_flip < 0.5:
            n_sons = n_sons + 1
        n_children = n_children + 1
    trial_array[i] = n_children
    total = total + n_children
    if n_children > n_children_max:
        n_children_max = n_children

In [8]:
bins_ch = list(range(0,n_children_max+1))
hist_children = pyplot.hist(trial_array, bins_ch)
fig_ch = pyplot.gcf()

average = total / number_of_families
print(average)


3.999476

In [9]:
def binomial_dist(p,n,k):
    P = math.factorial(n)/(math.factorial(k)*math.factorial(n-k))*(p)**k*(1-p)**(n-k)
    return P

In [10]:
p = 0.25
n = 4 

total = 0 
for k in range(0,n+1):
    P = binomial_dist(p, n, k)
    string = "k: " + str(k) + "\tP: " + str(P)
    print(string)
    total = total + P
    
print(total)


k: 0	P: 0.31640625
k: 1	P: 0.421875
k: 2	P: 0.2109375
k: 3	P: 0.046875
k: 4	P: 0.00390625
1.0

Fun with Poisson


In [11]:
def poisson_pdf(k, L):
    p = L**k*math.exp(-L)/math.factorial(k)
    return p

In [12]:
num_points = 10000000
average = 50 # lambda 
T = 10
dict_num_lists = dict()

i = 0 
while i < num_points: 
    t = numpy.random.rand()
    n = numpy.random.poisson(t*average)
    if n in dict_num_lists:
        dict_num_lists[n].append(t)
    else:
        dict_num_lists[n] = [t]
    i = i + 1

In [16]:
def find_prob_of_time(n):
    time_list = dict_num_lists[n]
    for time in time_list:
        print(time)

In [18]:
num_time_bins = 100
time_bins = [None]*(num_time_bins+1)
for i in range(0, num_time_bins+1):
    time_bins[i] = i/num_time_bins

In [43]:
Nobs = 25
hist_times = pyplot.hist(dict_num_lists[Nobs], time_bins, normed=True)



In [57]:
Nmax = int(average*1.2)
plot_array = numpy.ndarray((Nmax))

for n in range(0, int(Nmax)):
    time_list = dict_num_lists.get(n, [])
    num_of_times = len(time_list)
    total_time = sum(time_list)
    expected_time = total_time / num_of_times
    plot_array[n] = expected_time

print(plot_array)


[ 0.0199868   0.03998588  0.05989905  0.07994219  0.09998663  0.11994175
  0.140068    0.16003884  0.18029164  0.20015231  0.21987434  0.24031589
  0.25975437  0.27979583  0.29976424  0.32006299  0.33972221  0.36001923
  0.38007283  0.39978621  0.42010299  0.44014082  0.45979308  0.47986176
  0.50054544  0.52013932  0.54033205  0.56016512  0.57948306  0.5997365
  0.61952883  0.63904301  0.6582862   0.67716633  0.69591298  0.71449946
  0.73241964  0.74870515  0.76626143  0.78201317  0.79644882  0.81048572
  0.82374891  0.83545508  0.84705091  0.85760299  0.86738573  0.87591805
  0.88403859  0.89146415  0.89814179  0.90443955  0.91019777  0.91560533
  0.91983776  0.92422639  0.92849908  0.93140516  0.9352155   0.93792875]

In [58]:
pyplot.plot(plot_array)


Out[58]:
[<matplotlib.lines.Line2D at 0x11b1be550>]

In [59]:
plot_array2 = 1. - plot_array
pyplot.plot(plot_array2)


Out[59]:
[<matplotlib.lines.Line2D at 0x1184cea20>]

Problem 2


In [92]:
num_points = 10000000
max_num = 15
num_num_lists = dict()

i = 0 
while i < num_points: 
    lam = numpy.random.rand() * max_num
    N = numpy.random.poisson(lam)
    M = numpy.random.poisson(lam)
    if N in num_num_lists:
        num_num_lists[N].append(M)
    else:
        num_num_lists[N] = [M]
    i = i + 1

In [93]:
num_bins = range(0, max_num+1)
N_seen = 3
hist_nums = pyplot.hist(num_num_lists[N_seen], num_bins, normed=True)



In [75]:
print(num_num_lists[2])


[7]

In [44]:
timeit(numpy.random.poisson(average))
for i in range(0, num_time_bins+1):
        total_time = total_time + time_list[i]


The slowest run took 1668.84 times longer than the fastest. This could mean that an intermediate result is being cached 
1000000 loops, best of 3: 652 ns per loop