notebook.community

Edit and run



In [1]:

    
import numpy as np
import math



In [2]:

    
#### E-M Coin Toss Example as given in the EM tutorial paper by Do and Batzoglou* #### 
# link: http://ai.stanford.edu/~chuongdo/papers/em_tutorial.pdf

def get_mn_log_likelihood(obs,probs):
    """ Return the (log)likelihood of obs, given the probs"""
    # Multinomial Distribution Log PMF
    # ln (pdf)      =             multinomial coeff            *   product of probabilities
    # ln[f(x|n, p)] = [ln(n!) - (ln(x1!)+ln(x2!)+...+ln(xk!))] + [x1*ln(p1)+x2*ln(p2)+...+xk*ln(pk)]     

    multinomial_coeff_denom= 0
    prod_probs = 0
    for x in range(0,len(obs)): # loop through state counts in each observation
        multinomial_coeff_denom = multinomial_coeff_denom + math.log(math.factorial(obs[x]))
        prod_probs = prod_probs + obs[x]*math.log(probs[x])

    multinomial_coeff = math.log(math.factorial(sum(obs))) -  multinomial_coeff_denom
    likelihood = multinomial_coeff + prod_probs
    return likelihood



In [3]:

    
# 1st:  Coin B, {HTTTHHTHTH}, 5H,5T
# 2nd:  Coin A, {HHHHTHHHHH}, 9H,1T
# 3rd:  Coin A, {HTHHHHHTHH}, 8H,2T
# 4th:  Coin B, {HTHTTTHHTT}, 4H,6T
# 5th:  Coin A, {THHHTHHHTH}, 7H,3T
# so, from MLE: pA(heads) = 0.80 and pB(heads)=0.45

# represent the experiments
head_counts = np.array([5,9,8,4,7])
tail_counts = 10-head_counts
experiments = zip(head_counts,tail_counts)
print experiments









    



[(5, 5), (9, 1), (8, 2), (4, 6), (7, 3)]



In [12]:

    
# helpful diagram from the linked PDF
from IPython.display import Image
Image(filename='pics\EM-algo-for coins\pic1.png')









    Out[12]:



In [10]:

    
# initialise the pA(heads) and pB(heads)
pA_heads = np.zeros(100); pA_heads[0] = 0.66
pB_heads = np.zeros(100); pB_heads[0] = 0.33

# E-M begins!
delta = 0.0001  
j = 0 # iteration counter
improvement = float('inf')

while (improvement>delta and j<100-1):
    
    expectation_A = np.zeros((5,2), dtype=float) 
    expectation_B = np.zeros((5,2), dtype=float)
    
    for i in range(0,len(experiments)):
        e = experiments[i] # i'th experiment
        ll_A = get_mn_log_likelihood(e,np.array([pA_heads[j],1-pA_heads[j]])) # loglikelihood of e given coin A
        ll_B = get_mn_log_likelihood(e,np.array([pB_heads[j],1-pB_heads[j]])) # loglikelihood of e given coin B

        weightA = math.exp(ll_A) / ( math.exp(ll_A) + math.exp(ll_B) ) # corresponding weight of A proportional to likelihood of A 
        weightB = math.exp(ll_B) / ( math.exp(ll_A) + math.exp(ll_B) ) # corresponding weight of B proportional to likelihood of B                            

        expectation_A[i] = np.dot(weightA, e) 
        expectation_B[i] = np.dot(weightB, e)
        
#     print np.sum(expectation_A, axis=0)
    
    pA_heads[j+1] = np.sum(expectation_A,0)[0] / np.sum(expectation_A); 
    pB_heads[j+1] = np.sum(expectation_B,0)[0] / np.sum(expectation_B); 
    
    improvement = max( abs(np.array([pA_heads[j+1],pB_heads[j+1]]) - np.array([pA_heads[j],pB_heads[j]]) ))
    j = j+1
    
#     print j, improvement
    
    
# results
print "pA_heads: " ,  [x for x in pA_heads if x!=0]
print "pB_heads: " ,  [x for x in pB_heads if x!=0]









    



pA_heads:  [0.66000000000000003, 0.73544528225140948, 0.77271624163234898, 0.78748310259262522, 0.79320307527096112, 0.7954073462288398, 0.79625623741975859, 0.7965833658946786, 0.79670955802758114, 0.79675829482449356]
pB_heads:  [0.33000000000000002, 0.4543681075084649, 0.49501581182656579, 0.50978213721687027, 0.51556775622502582, 0.51792295827753299, 0.51889514851249663, 0.5192980251547864, 0.5194650574656231, 0.51953426911855871]



In [7]:

    
final_pA_heads = [x for x in pA_heads if x!=0][-1]
final_pB_heads = [x for x in pB_heads if x!=0][-1]
print "final pA_heads = ", final_pA_heads 
print "final pB_heads = ", final_pB_heads

# compute likelihood of each exp
for exp in experiments:
    print exp
    print "coin A prob: ", math.exp(get_mn_log_likelihood(exp, np.array([final_pA_heads, 1-final_pA_heads])))
    print "coin B prob: ", math.exp(get_mn_log_likelihood(exp, np.array([final_pB_heads, 1-final_pB_heads])))









    



final pA_heads =  0.796781459127
final pB_heads =  0.519531845143
(5, 5)
coin A prob:  0.0280487352213
coin B prob:  0.244221811042
(9, 1)
coin A prob:  0.263036674609
coin B prob:  0.0132487274687
(8, 2)
coin A prob:  0.301892920185
coin B prob:  0.0551364900008
(4, 6)
coin A prob:  0.00596150820246
coin B prob:  0.188215608636
(7, 3)
coin A prob:  0.205326861921
coin B prob:  0.135975380469