In [243]:
import numpy as np
from sklearn.hmm import MultinomialHMM
import MySQLdb
import matplotlib.pyplot as plt
import pylab as pl

In [244]:
# Normalizes the time so that the average wait time for the next review is 1. 
def normalized(time):
    review_rate = len(time)/(time[len(time)-1]-time[0])
    normalized_time = np.zeros(len(time))
    for k in range(len(time)):
        normalized_time[k] = (time[k]-time[0])*review_rate
    return normalized_time

In [245]:
def GetTrainingSet(PID, tablename, cursor):
    sql = "Select RTime, RScore From " +tablename + " Where PID = " + '"' + PID +'";'
    cursor.execute(sql)
    data = cursor.fetchall()
    data = sorted(data)
    rating = np.array(zip(*data)[1], dtype = int)
    time = np.array(zip(*data)[0], dtype = float)
    normalized_time = normalized(time)
    discrete_time = np.zeros(len(time)) 
    for k in range(len(time)):
        rating[k] = rating[k]-1
        if normalized_time[k]> 1:
            discrete_time[k]=2
        else:
            discrete_time[k]=1
    return rating, time #np.column_stack([rating, discrete_time])

In [246]:
def running_avg(data):
    avg = np.zeros(len(data), dtype = float)
    total = 0
    for k in range(len(data)):
        #avg[k] = np.mean(data[:k])
        total += data[k]
        avg[k] = float(total)/float((k+1))
    return avg

In [247]:
db = MySQLdb.connect(host="localhost", user="root", db = "home_kitchen")
cursor = db.cursor()

In [248]:
tablename = 'all_hk'

In [249]:
PID1 = 'B000GXZ2GS'  #Later input PIDs from terminal or website or whatever
PID1 = ' ' + PID1

In [250]:
PID2 = ' B0000X7CMQ'
PID3 = ' B000GTR2F6'
PID4 = ' B000AQSMPO'
PID5 = ' B00005MF9C'
PID6 = ' B0000E2PEI'
PID7 = ' B0006SFFAQ'
PID8 = ' B00005AQ9Q'
PID9 = ' B00005R19P'
PID10 = ' B000FFQ554'

In [251]:
#Getting the Data:

In [252]:
X1, T1 = GetTrainingSet(PID1, tablename, cursor)
X2, T2 = GetTrainingSet(PID2, tablename, cursor)
X3, T3 = GetTrainingSet(PID3, tablename, cursor)
X4, T4 = GetTrainingSet(PID4, tablename, cursor)
X5, T5 = GetTrainingSet(PID5, tablename, cursor)
X6, T6 = GetTrainingSet(PID6, tablename, cursor)
X7, T7 = GetTrainingSet(PID7, tablename, cursor)
X8, T8 = GetTrainingSet(PID8, tablename, cursor)
X9, T9 = GetTrainingSet(PID9, tablename, cursor)
X10, T10 = GetTrainingSet(PID10, tablename, cursor)

In [253]:
R1 = running_avg(X1)
R2 = running_avg(X2)
R3 = running_avg(X3)
R4 = running_avg(X4)
R5 = running_avg(X5)
R6 = running_avg(X6)
R7 = running_avg(X7)
R8 = running_avg(X8)
R9 = running_avg(X9)
R10 = running_avg(X10)

In [254]:
print R2


[ 4.          4.          4.         ...,  2.66234888  2.66091458
  2.66206897]

In [255]:
#### HMM Time

In [256]:
n_components =6 #? number of states in the model. I'm just guessing here.

In [257]:
model = MultinomialHMM(n_components )

In [258]:
model.fit([X4])


Out[258]:
MultinomialHMM(algorithm='viterbi',
        init_params='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',
        n_components=6, n_iter=10,
        params='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',
        random_state=<mtrand.RandomState object at 0x10e140690>,
        startprob=None, startprob_prior=1.0, thresh=0.01, transmat=None,
        transmat_prior=1.0)

In [259]:
hidden_states = model.predict(X4)

In [260]:
fig = pl.figure()

ax = fig.add_subplot(111)
for i in range(n_components):
    # use fancy indexing to plot data in each state
    idx = (hidden_states == i)
    ax.plot_date(T4[idx], R4[idx], 'o', label="%dth hidden state" % i)
    ax.legend()


<matplotlib.figure.Figure at 0x115fda550>

In [261]:
for i in range(n_components):
    # use fancy indexing to plot data in each state
    idx = (hidden_states == i)
    ax.plot_date(T4[idx], R4[idx], 'o', label="%dth hidden state" % i)
    ax.legend()

In [262]:
pl.show()

In [ ]: