In [350]:
#omg I can fit more than one data set, do that!
In [3]:
import numpy as np
from sklearn.hmm import GaussianHMM
from scipy import stats
In [4]:
f = open('temp', 'w')
In [5]:
import datetime as dt
In [6]:
import matplotlib.pyplot as plt
In [7]:
import MySQLdb
In [8]:
%matplotlib inline
In [9]:
def time_data_clean(time_data):
rating = [0.0]*len(time_data)
time = [0]*len(time_data)
rating = [x[0] for x in time_data]
time = [x[1] for x in time_data]
return rating, time
In [10]:
def time_since_last(time):
time_since_last = np.zeros(len(time))
for i in range(1, len(time)):
time_since_last[i] = time[i] - time[i-1]
if time_since_last[i] == 0:
time_since_last[i] = 1
time_since_last[0] = time_since_last[1]
return time_since_last
In [11]:
def get_data(PID, cursor, tablename):
sql = "Select RTime, RScore From " +tablename + " Where PID = " + '"' + PID +'";'
cursor.execute(sql)
data = cursor.fetchall()
data = sorted(data)
rating = np.array(zip(*data)[1], dtype = int)
time = np.array(zip(*data)[0], dtype = float)
dates=[dt.datetime.fromtimestamp(ts) for ts in time]
return rating, time, dates
In [12]:
def avg_rating(rating):
avg = [0]*len(rating)
avg[0] = float(rating[0])
for k in range(1, len(rating)):
avg[k]= float(np.mean(rating[:k]))
return avg
In [34]:
def rolling_avg_rating(rating):
limited_avg = [0]*len(rating)
limited_avg[0] = float(rating[0])
for k in range(1, len(rating)):
if k<40:
limited_avg[k]= float(np.mean(rating[:k]))
else:
limited_avg[k]=float(np.mean(rating[k-40:k]))
return limited_avg
In [14]:
db = MySQLdb.connect(host="localhost", user="root", db = "home_kitchen")
In [15]:
cursor = db.cursor()
In [16]:
tablename = 'all_hk'
In [17]:
PIDlist = [' B0000X7CMQ', ' B00005AQ9Q', ' B000GTR2F6', ' B000AQSMPO', ' B00005MF9C', ' B0000E2PEI', ' B0006SFFAQ', ' B00005AQ9Q', ' B00005R19P', ' B000FFQ554', ' B0006ZUHR0']
In [52]:
pid1 = ' B0000E2PEI' #PIDlist[0]
pid2 = PIDlist[1]
pid3 = PIDlist[3]
In [53]:
r1, t1, d1 = get_data(pid1, cursor, tablename)
#r2, t2, d2 = get_data(pid2, cursor, tablename)
#r3, t3, d3 = get_data(pid3, cursor, tablename)
In [54]:
#ratings = [r1, r2, r3]
In [55]:
#avgs = map(avg_rating, ratings)
#limavs = map(rolling_avg_rating, ratings)
avg = avg_rating(r1)
limav = rolling_avg_rating(r1)
In [56]:
for k in range(len(t1)):
f.write(str(t1[k])+',')
f.write('\n\n\n\n')
for k in range(len(r1)):
f.write(str(r1[k])+',')
In [57]:
f.close()
In [58]:
plt.scatter(d1, r1)
Out[58]:
In [59]:
plt.scatter(d1, avg)
Out[59]:
In [60]:
plt.scatter(d1, limav)
Out[60]:
In [61]:
#Y1 = np.array(limavs[0]).reshape(len(limavs[0]), 1)
#Y2 = np.array(limavs[1]).reshape(len(limavs[1]), 1)
#Y3 = np.array(limavs[2]).reshape(len(limavs[2]), 1)
Y1 = np.array(limav).reshape(len(limav), 1)
In [62]:
n_components = 3
In [63]:
# make an HMM instance and execute fit
model = GaussianHMM(n_components, "full")
#model.fit([X])
# predict the optimal sequence of internal hidden state
#hidden_states = model.predict(X)
In [64]:
model.fit([Y1])
Out[64]:
In [65]:
hidden_states = model.predict(Y1)
In [66]:
print model.score(Y1)
In [67]:
#colors = np.concatenate([np.array([6 for i in range(10)]), hidden_states])
In [68]:
#print colors
In [68]:
In [69]:
plt.scatter(d1, avg, c = hidden_states) #used to say avgs[0]=0?
Out[69]:
In [541]:
plt.show()
In [666]:
data = zip(hidden_states, avgs[0])
In [667]:
print data
In [2]:
PID = " B0000X7CMQ"
sql = "Select distinct PTitle from all_hk where PID = " + '"' + PID + '";'
prodname = str(query_db(sql))
prodname = tuple(x[0] for x in prodname)
In [ ]: