In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
import pandas as pd
import random
import scipy.stats as st
from pydnb.dnb import DNB
In [2]:
def change(A,state):
if random.random() > max(A[state]):
return int(not(state))
else:
return state
def generate_dataset(A,B,size=1000):
Y = []
X = []
state=random.randint(0,len(A)-1)
for i in range(size):
Y.append(state)
X.append(np.random.randn()*B[state][1]+B[state][0])
state=change(A,state)
df = pd.DataFrame({'state':Y,'obs':X})
return df
In [3]:
A = np.array([[0.7,0.3],[0.4,0.6]])
B = [[6,5],[15,5]]
df1 = generate_dataset(A,B)
#plt.hist([df[df['state']==1].obs,df[df.state==0].obs],stacked=False, color = ['r','g'])
#plt.legend(['state 1','state 0']);
In [4]:
hmm1 = DNB()
hmm1.mle(df1,'state',features = {'obs':st.norm});
In [5]:
pd.DataFrame(hmm1.A)
Out[5]:
In [6]:
pd.DataFrame(hmm1.B)
Out[6]:
In [8]:
print(hmm1.emission_prob(0,df1.iloc[2]))
print(hmm1.transition_prob(0,1))
In [9]:
sum(hmm1._forward(df1,k=10))
Out[9]:
In [10]:
df=hmm1.sample(100)[0]
In [11]:
hmm1.seq_probability(df,df.state)
Out[11]:
In [16]:
A = np.array([[0.4,0.6],[0.3,0.7]])
B = [[3,6],[8,4]]
df2 = generate_dataset(A,B)
In [17]:
hmm2 = DNB()
hmm2.mle(df2,'state',features = {'obs':st.norm});
In [19]:
from pydnb.utils import output_sequence_distance
dist_hmm1_hmm2=output_sequence_distance(hmm1,hmm2,seq_len=300,sequences=10)
print('done')
dist_hmm2_hmm1=output_sequence_distance(hmm2,hmm1,seq_len=300,sequences=10)
dist_hmm1_hmm1=output_sequence_distance(hmm1,hmm1,seq_len=300,sequences=10)
dist_hmm2_hmm2=output_sequence_distance(hmm2,hmm2,seq_len=300,sequences=10)
print("hmm1-->hmm2: %f\nhmm2-->hmm1: %f\nhmm1-->hmm1: %f\nhmm2-->hmm2: %f"%(dist_hmm1_hmm2,dist_hmm2_hmm1,dist_hmm1_hmm1,dist_hmm2_hmm2))
In [ ]:
hmm1.B
In [ ]: