In [1]:
%matplotlib notebook
from __future__ import division
from scipy.stats import binom as binomial
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#from ipywidgets import StaticInteract, RangeWidget
import pandas as pd
from IPython.display import display
from sympy import init_printing, symbols, Eq
init_printing()
In [2]:
coin_toss = []
coin_toss.append('H T T T H H T H T H'.split())
coin_toss.append('H H H H T H H H H H'.split())
coin_toss.append('H T H H H H H T H H'.split())
coin_toss.append('H T H T T T H H T T'.split())
coin_toss.append('T H H H T H H H T H'.split())
In [3]:
df = pd.DataFrame(coin_toss)
In [4]:
df
Out[4]:
In [5]:
thetaA, thetaB = symbols('theta_A theta_B')
a,b = thetaA, thetaB # Hack to display
thetaA = 0.6
thetaB = 0.5
a , thetaA
Out[5]:
In [6]:
b, thetaB
Out[6]:
In [7]:
## Observed Case
observed = ['B', 'A', 'A', 'B', 'A']
index_A = [i for i,x in enumerate(observed) if x=='A']
index_B = [i for i,x in enumerate(observed) if x=='B']
total_tosses = df.size
A_tosses = df.iloc[index_A].unstack()
B_tosses = df.iloc[index_B].unstack()
A_heads = A_tosses.value_counts()['H']
B_heads = B_tosses.value_counts()['H']
theta_A = A_heads/A_tosses.size
theta_B = B_heads/B_tosses.size
In [8]:
(a, theta_A)
Out[8]:
In [9]:
(b, theta_B)
Out[9]:
In [10]:
thetaA = 0.6
thetaB = 0.5
def em(theta_old):
row_prob = []
## Expectation
for row in coin_toss:
count_heads = row.count('H')
p_a = binomial.pmf(count_heads, len(row), theta_old['A'])
p_b = binomial.pmf(count_heads, len(row), theta_old['B'])
p_t = p_a+p_b
p_a = p_a/p_t
p_b = p_b/p_t
row_prob.append({'A': p_a, 'B': p_b, 'count_heads': count_heads})
## Maximisation
new_coin_toss = []
for row in row_prob:
A_heads =
new_coin_tosse.append