In [243]:
import numpy as np
from sklearn.hmm import MultinomialHMM
import MySQLdb
import matplotlib.pyplot as plt
import pylab as pl
In [244]:
# Normalizes the time so that the average wait time for the next review is 1.
def normalized(time):
review_rate = len(time)/(time[len(time)-1]-time[0])
normalized_time = np.zeros(len(time))
for k in range(len(time)):
normalized_time[k] = (time[k]-time[0])*review_rate
return normalized_time
In [245]:
def GetTrainingSet(PID, tablename, cursor):
sql = "Select RTime, RScore From " +tablename + " Where PID = " + '"' + PID +'";'
cursor.execute(sql)
data = cursor.fetchall()
data = sorted(data)
rating = np.array(zip(*data)[1], dtype = int)
time = np.array(zip(*data)[0], dtype = float)
normalized_time = normalized(time)
discrete_time = np.zeros(len(time))
for k in range(len(time)):
rating[k] = rating[k]-1
if normalized_time[k]> 1:
discrete_time[k]=2
else:
discrete_time[k]=1
return rating, time #np.column_stack([rating, discrete_time])
In [246]:
def running_avg(data):
avg = np.zeros(len(data), dtype = float)
total = 0
for k in range(len(data)):
#avg[k] = np.mean(data[:k])
total += data[k]
avg[k] = float(total)/float((k+1))
return avg
In [247]:
db = MySQLdb.connect(host="localhost", user="root", db = "home_kitchen")
cursor = db.cursor()
In [248]:
tablename = 'all_hk'
In [249]:
PID1 = 'B000GXZ2GS' #Later input PIDs from terminal or website or whatever
PID1 = ' ' + PID1
In [250]:
PID2 = ' B0000X7CMQ'
PID3 = ' B000GTR2F6'
PID4 = ' B000AQSMPO'
PID5 = ' B00005MF9C'
PID6 = ' B0000E2PEI'
PID7 = ' B0006SFFAQ'
PID8 = ' B00005AQ9Q'
PID9 = ' B00005R19P'
PID10 = ' B000FFQ554'
In [251]:
#Getting the Data:
In [252]:
X1, T1 = GetTrainingSet(PID1, tablename, cursor)
X2, T2 = GetTrainingSet(PID2, tablename, cursor)
X3, T3 = GetTrainingSet(PID3, tablename, cursor)
X4, T4 = GetTrainingSet(PID4, tablename, cursor)
X5, T5 = GetTrainingSet(PID5, tablename, cursor)
X6, T6 = GetTrainingSet(PID6, tablename, cursor)
X7, T7 = GetTrainingSet(PID7, tablename, cursor)
X8, T8 = GetTrainingSet(PID8, tablename, cursor)
X9, T9 = GetTrainingSet(PID9, tablename, cursor)
X10, T10 = GetTrainingSet(PID10, tablename, cursor)
In [253]:
R1 = running_avg(X1)
R2 = running_avg(X2)
R3 = running_avg(X3)
R4 = running_avg(X4)
R5 = running_avg(X5)
R6 = running_avg(X6)
R7 = running_avg(X7)
R8 = running_avg(X8)
R9 = running_avg(X9)
R10 = running_avg(X10)
In [254]:
print R2
In [255]:
#### HMM Time
In [256]:
n_components =6 #? number of states in the model. I'm just guessing here.
In [257]:
model = MultinomialHMM(n_components )
In [258]:
model.fit([X4])
Out[258]:
In [259]:
hidden_states = model.predict(X4)
In [260]:
fig = pl.figure()
ax = fig.add_subplot(111)
for i in range(n_components):
# use fancy indexing to plot data in each state
idx = (hidden_states == i)
ax.plot_date(T4[idx], R4[idx], 'o', label="%dth hidden state" % i)
ax.legend()
In [261]:
for i in range(n_components):
# use fancy indexing to plot data in each state
idx = (hidden_states == i)
ax.plot_date(T4[idx], R4[idx], 'o', label="%dth hidden state" % i)
ax.legend()
In [262]:
pl.show()
In [ ]: