notebook.community

Edit and run



In [2]:

    
import numpy as np
from sklearn.hmm import MultinomialHMM
import MySQLdb
import matplotlib.pyplot as plt
import pylab as pl
%matplotlib inline



In [3]:

    
# Normalizes the time so that the average wait time for the next review is 1. 
def normalized(time):
    review_rate = len(time)/(time[len(time)-1]-time[0])
    normalized_time = np.zeros(len(time))
    for k in range(len(time)):
        normalized_time[k] = (time[k]-time[0])*review_rate
    return normalized_time



In [4]:

    
def GetTrainingSet(PID, tablename, cursor):
    sql = "Select RTime, RScore From " +tablename + " Where PID = " + '"' + PID +'";'
    cursor.execute(sql)
    data = cursor.fetchall()
    data = sorted(data)
    rating = np.array(zip(*data)[1], dtype = int)
    time = np.array(zip(*data)[0], dtype = float)
    normalized_time = normalized(time)
    normalized_time
    discrete_time = np.zeros(len(time))
    #ratings 1-5, mult by 2 if short wait for a rating,
    data_encoded = np.zeros(len(time))
    #data_encoded.append('temp')
    for k in range(1, len(time)):
        rating[k] = rating[k]-1
        if (normalized_time[k] - normalized_time[k-1])> 1:
            discrete_time[k]=2
            #data_encoded[k] = 
            #data_encoded.append('f' + str(rating[k]))  #ok let's try strings instead
        else:
            discrete_time[k]=1
            #data_encoded.append('s' + str(rating[k]))
        data_encoded[k] = int(rating[k]*2 - discrete_time[k])
    #print discrete_time
    discrete_time[0] = discrete_time[1]
    data_encoded[0] = int(rating[0]*2 - discrete_time[0])
    rating[0]=rating[0]-1
    return data_encoded, rating, time 
    #return np.column_stack([rating, discrete_time]), time



In [5]:

    
def running_avg(data): #now assuming rating is 1 less than actual because sklearn hmm is so dumb
    avg = np.zeros(len(data), dtype = float)
    total = 0
    for k in range(len(data)):
        #avg[k] = np.mean(data[:k])
        total += data[k]+1
        avg[k] = float(total)/float((k+1))
    return avg



In [6]:

    
db = MySQLdb.connect(host="localhost", user="root", db = "home_kitchen")
cursor = db.cursor()



In [7]:

    
tablename = 'all_hk'



In [8]:

    
PID1 = 'B000GXZ2GS'  #Later input PIDs from terminal or website or whatever
PID1 = ' ' + PID1



In [9]:

    
PID2 = ' B0000X7CMQ' #zojirushi, it breaks
PID3 = ' B000GTR2F6'
PID4 = ' B000AQSMPO'
PID5 = ' B00005MF9C'
PID6 = ' B0000E2PEI'
PID7 = ' B0006SFFAQ'
PID8 = ' B00005AQ9Q'
PID9 = ' B00005R19P'
PID10 = ' B000FFQ554'
PID11 = ' B0006ZUHR0'



In [10]:

    
#Getting the Data:



In [11]:

    
#X1, T1 = GetTrainingSet(PID1, tablename, cursor)
#print X1, T1
#X2, T2 = GetTrainingSet(PID2, tablename, cursor)
#X3, T3 = GetTrainingSet(PID3, tablename, cursor)
D1, X1, T1 = GetTrainingSet(PID1, tablename, cursor)
D2, X2, T2 = GetTrainingSet(PID2, tablename, cursor)
D3, X3, T3 = GetTrainingSet(PID3, tablename, cursor)
D4, X4, T4 = GetTrainingSet(PID4, tablename, cursor)
D5, X5, T5 = GetTrainingSet(PID5, tablename, cursor)
D6, X6, T6 = GetTrainingSet(PID6, tablename, cursor)
D7, X7, T7 = GetTrainingSet(PID7, tablename, cursor)
D8, X8, T8 = GetTrainingSet(PID8, tablename, cursor)
D9, X9, T9 = GetTrainingSet(PID9, tablename, cursor)
D10, X10, T10 = GetTrainingSet(PID10, tablename, cursor)
D11, X11, T11 = GetTrainingSet(PID11, tablename, cursor)



In [12]:

    
#R1 = np.array(zip(*X1)[0], dtype = int)
#R2 = np.array(zip(*X2)[0], dtype = int)
#R3 = np.array(zip(*X3)[0], dtype = int)



In [13]:

    
R1 = running_avg(X1)
R2 = running_avg(X2)
R3 = running_avg(X3)
R4 = running_avg(X4)
R5 = running_avg(X5)
R6 = running_avg(X6)
R7 = running_avg(X7)
R8 = running_avg(X8)
R9 = running_avg(X9)
R10 = running_avg(X10)
R11 = running_avg(X11)



In [14]:

    
print D1









    



[ 8.  6.  6. ...,  7.  5.  7.]



In [15]:

    
#### HMM Time



In [16]:

    
n_components = 3 #? number of states in the model. I'm just guessing here.



In [39]:

    
model = MultinomialHMM(n_components, n_iter = 10)



In [132]:

    
model.fit([X7])
#model.fit([X1])
#model.fit([X2])
#model.fit([X3])
#model.fit([X4])
#model.fit([X5])
#model.fit([X7])
#model.fit([X8])
#model.fit([X9])









    Out[132]:





MultinomialHMM(algorithm='viterbi',
        init_params='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',
        n_components=3, n_iter=10,
        params='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',
        random_state=<mtrand.RandomState object at 0x109f466c0>,
        startprob=None, startprob_prior=1.0, thresh=0.01, transmat=None,
        transmat_prior=1.0)



In [133]:

    
hidden_states = model.predict(X6)



In [134]:

    
model_score = model.score(X6)



In [135]:

    
print model_score









    



-717.214791048



In [136]:

    
colorlist = ['r', 'g', 'b']



In [140]:

    
colors= [colorlist[i] for i in hidden_states]



In [141]:

    
print colors









    



['b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b']



In [138]:

    
plt.scatter(T6, R6, c = colors)









    Out[138]:





<matplotlib.collections.PathCollection at 0x10f937290>



In [722]:

    
plt.show()



In [ ]: