``````

In [5]:

# Importing the simple data sample

simple = ssr.SimpleSequence()

``````
``````

In [6]:

print "train\n", simple.train
print "test\n", simple.test

print "x"
for sequence in simple.train.seq_list:
print sequence.x

print "y"
for sequence in simple.train.seq_list:
print sequence.y

``````
``````

train
[walk/rainy walk/sunny shop/sunny clean/sunny , walk/rainy walk/rainy shop/rainy clean/sunny , walk/sunny shop/sunny shop/sunny clean/sunny ]
test
[walk/rainy walk/sunny shop/sunny clean/sunny , clean/sunny walk/sunny tennis/sunny walk/sunny ]
x
[0, 0, 1, 2]
[0, 0, 1, 2]
[0, 1, 1, 2]
y
[0, 1, 1, 1]
[0, 0, 0, 1]
[1, 1, 1, 1]

``````
``````

In [7]:

import lxmls.sequences.hmm as hmmc

# Supervised training
hmm = hmmc.HMM(simple.x_dict, simple.y_dict)
hmm.train_supervised(simple.train)

``````
``````

In [8]:

print "Initial Probabilities:\n", hmm.initial_probs
print "Transition Probabilities:\n", hmm.transition_probs
print "Final Probabilities:\n", hmm.final_probs
print "Emission Probabilities:\n", hmm.emission_probs

``````
``````

Initial Probabilities:
[ 0.66666667  0.33333333]
Transition Probabilities:
[[ 0.5    0.   ]
[ 0.5    0.625]]
Final Probabilities:
[ 0.     0.375]
Emission Probabilities:
[[ 0.75   0.25 ]
[ 0.25   0.375]
[ 0.     0.375]
[ 0.     0.   ]]

``````
``````

In [27]:

# Calculate the Trellis Score (Log Probabilities) for the first sequence
initial_scores, transition_scores, final_scores, emission_scores = hmm.compute_scores(simple.train.seq_list[0])

``````
``````

In [10]:

print "initial scores:\n", initial_scores
print "transition_scores:\n", transition_scores
print "final_scores:\n", final_scores
print "emission_scores:\n", emission_scores

``````
``````

initial scores:
[-0.40546511 -1.09861229]
transition_scores:
[[[-0.69314718        -inf]
[-0.69314718 -0.47000363]]

[[-0.69314718        -inf]
[-0.69314718 -0.47000363]]

[[-0.69314718        -inf]
[-0.69314718 -0.47000363]]]
final_scores:
[       -inf -0.98082925]
emission_scores:
[[-0.28768207 -1.38629436]
[-0.28768207 -1.38629436]
[-1.38629436 -0.98082925]
[       -inf -0.98082925]]

``````
``````

In [11]:

# Log Sum
from lxmls.sequences.log_domain import *

``````
``````

In [12]:

# Calculating the Log Likelihood using the Forward Algorithm
log_likelihood, forward = hmm.decoder.run_forward(initial_scores, transition_scores, final_scores, emission_scores)
print 'Log-Likelihood =', log_likelihood
log_likelihood, backward = hmm.decoder.run_backward(initial_scores, transition_scores, final_scores, emission_scores)
print 'Log-Likelihood =', log_likelihood

``````
``````

Log-Likelihood = -5.06823232601
Log-Likelihood = -5.06823232601

``````
``````

In [16]:

# Computing the node posteriors for the first sample sequence
initial_scores, transition_scores, final_scores, emission_scores = hmm.compute_scores(simple.train.seq_list[0])
state_posteriors, _, _ = hmm.compute_posteriors(initial_scores, transition_scores, final_scores, emission_scores)

print state_posteriors

``````
``````

[[ 0.95738152  0.04261848]
[ 0.75281282  0.24718718]
[ 0.26184794  0.73815206]
[ 0.          1.        ]]

``````
``````

In [24]:

y_pred = hmm.posterior_decode(simple.test.seq_list[0])
print "Prediction test 0:", y_pred
print "Truth 0:", simple.test.seq_list[0]

``````
``````

Prediction test 0: walk/rainy walk/rainy shop/sunny clean/sunny
Truth 0: walk/rainy walk/sunny shop/sunny clean/sunny

``````
``````

In [28]:

# This will cause problems because tennis was never seen!
y_pred = hmm.posterior_decode(simple.test.seq_list[1])
print "Prediction test 1:", y_pred
print "Truth 1:", simple.test.seq_list[1]

``````
``````

Prediction test 1: clean/sunny walk/sunny tennis/sunny walk/sunny
Truth 1: clean/sunny walk/sunny tennis/sunny walk/sunny

``````
``````

In [26]:

# So we will smooth instead
hmm.train_supervised(simple.train, smoothing=0.1)
y_pred = hmm.posterior_decode(simple.test.seq_list[0])
print "Prediction test 0 with smoothing:", y_pred
print "Truth test 0:", simple.test.seq_list[0]

y_pred = hmm.posterior_decode(simple.test.seq_list[1])
print "Prediction test 1 with smoothing:", y_pred
print "Truth test 1:", simple.test.seq_list[1]

``````
``````

Prediction test 0 with smoothing: walk/rainy walk/rainy shop/sunny clean/sunny
Truth test 0: walk/rainy walk/sunny shop/sunny clean/sunny
Prediction test 1 with smoothing: clean/sunny walk/sunny tennis/sunny walk/sunny
Truth test 1: clean/sunny walk/sunny tennis/sunny walk/sunny

``````
``````

In [29]:

y_pred, score = hmm.viterbi_decode(simple.test.seq_list[0])
print "Viterbi decoding Prediction test 0 with smoothing"
print y_pred, score
print "Truth test 0"
print simple.test.seq_list[0]

y_pred, score = hmm.viterbi_decode(simple.test.seq_list[1])
print "Viterbi decoding Prediction test 1 with smoothing"
print y_pred, score
print "Truth test 1"
print simple.test.seq_list[1]

``````
``````

Viterbi decoding Prediction test 0 with smoothing
walk/rainy walk/rainy shop/sunny clean/sunny  -6.02050124698
Truth test 0
walk/rainy walk/sunny shop/sunny clean/sunny
Viterbi decoding Prediction test 1 with smoothing
clean/sunny walk/sunny tennis/sunny walk/sunny  -11.713974074
Truth test 1
clean/sunny walk/sunny tennis/sunny walk/sunny

``````
``````

In [32]:

import lxmls.sequences.confusion_matrix as cm

corpus = pcc.PostagCorpus()
hmm = hmmc.HMM(corpus.word_dict, corpus.tag_dict)
hmm.train_supervised(train_seq)
hmm.print_transition_matrix()

viterbi_pred_train = hmm.viterbi_decode_corpus(train_seq)
posterior_pred_train = hmm.posterior_decode_corpus(train_seq)
eval_viterbi_train =   hmm.evaluate_corpus(train_seq, viterbi_pred_train)
eval_posterior_train = hmm.evaluate_corpus(train_seq, posterior_pred_train)
print "Train Set Accuracy: Posterior Decode %.3f, Viterbi Decode: %.3f"%(eval_posterior_train,eval_viterbi_train)

viterbi_pred_test = hmm.viterbi_decode_corpus(test_seq)
posterior_pred_test = hmm.posterior_decode_corpus(test_seq)
eval_viterbi_test =   hmm.evaluate_corpus(test_seq,viterbi_pred_test)
eval_posterior_test = hmm.evaluate_corpus(test_seq,posterior_pred_test)
print "Test Set Accuracy: Posterior Decode %.3f, Viterbi Decode: %.3f"%(eval_posterior_test,eval_viterbi_test)

best_smothing = hmm.pick_best_smoothing(train_seq, dev_seq, [10,1,0.1,0])

hmm.train_supervised(train_seq, smoothing=best_smothing)
viterbi_pred_test = hmm.viterbi_decode_corpus(test_seq)
posterior_pred_test = hmm.posterior_decode_corpus(test_seq)
eval_viterbi_test =   hmm.evaluate_corpus(test_seq, viterbi_pred_test)
eval_posterior_test = hmm.evaluate_corpus(test_seq, posterior_pred_test)
print "Best Smoothing %f --  Test Set Accuracy: Posterior Decode %.3f, Viterbi Decode: %.3f"%(best_smothing,eval_posterior_test,eval_viterbi_test)

confusion_matrix = cm.build_confusion_matrix(test_seq.seq_list, viterbi_pred_test,
len(corpus.tag_dict), hmm.get_num_states())

cm.plot_confusion_bar_graph(confusion_matrix, corpus.tag_dict,
range(hmm.get_num_states()), 'Confusion matrix')

``````
``````

Train Set Accuracy: Posterior Decode 0.985, Viterbi Decode: 0.985
Test Set Accuracy: Posterior Decode 0.350, Viterbi Decode: 0.509
Smoothing 10.000000 --  Train Set Accuracy: Posterior Decode 0.731, Viterbi Decode: 0.691
Smoothing 10.000000 -- Test Set Accuracy: Posterior Decode 0.712, Viterbi Decode: 0.675
Smoothing 1.000000 --  Train Set Accuracy: Posterior Decode 0.887, Viterbi Decode: 0.865
Smoothing 1.000000 -- Test Set Accuracy: Posterior Decode 0.818, Viterbi Decode: 0.792
Smoothing 0.100000 --  Train Set Accuracy: Posterior Decode 0.968, Viterbi Decode: 0.965
Smoothing 0.100000 -- Test Set Accuracy: Posterior Decode 0.851, Viterbi Decode: 0.842
Smoothing 0.000000 --  Train Set Accuracy: Posterior Decode 0.985, Viterbi Decode: 0.985
Smoothing 0.000000 -- Test Set Accuracy: Posterior Decode 0.370, Viterbi Decode: 0.526
Best Smoothing 0.100000 --  Test Set Accuracy: Posterior Decode 0.837, Viterbi Decode: 0.827

``````
``````

In [ ]:

``````