In [17]:
import argparse
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from BayesianBlocks import bayesian_blocks
import MySQLdb
import datetime as dt
%matplotlib inline
In [10]:
def avg_rating(rating):
avg = [0]*len(rating)
avg[0] = float(rating[0])
for k in range(1, len(rating)):
avg[k]= float(np.mean(rating[:k]))
return avg
In [11]:
def cuts_out_repeats(x, y):
hold = x[0]
xnew = []
ynew = []
for i in range(1,len(x)):
if x[i] == hold:
pass
else:
xnew.append(x[i]/1000)
ynew.append(y[i])
hold = x[i]
return xnew, ynew
In [12]:
def get_data(PID, cursor, tablename):
sql = "Select RTime, RScore From " +tablename + " Where PID = " + '"' + PID +'";'
cursor.execute(sql)
data = cursor.fetchall()
data = sorted(data)
rating = np.array(zip(*data)[1], dtype = int)
time = np.array(zip(*data)[0], dtype = float)
dates=[dt.datetime.fromtimestamp(ts) for ts in time]
return rating, time, dates
In [13]:
PIDlist = [' B0000X7CMQ', ' B00005AQ9Q', ' B000GTR2F6', ' B000AQSMPO', ' B00005MF9C', ' B0000E2PEI', ' B0006SFFAQ', ' B00005AQ9Q', ' B00005R19P', ' B000FFQ554', ' B0006ZUHR0']
In [14]:
db = MySQLdb.connect(host="localhost", user="root", db = "home_kitchen")
cursor = db.cursor()
In [15]:
tablename = 'all_hk'
pid1 = PIDlist[0]
pid2 = PIDlist[1]
pid3 = PIDlist[2]
In [18]:
r1, t1, d1 = get_data(pid1, cursor, tablename)
r2, t2, d2 = get_data(pid2, cursor, tablename)
r3, t3, d3 = get_data(pid3, cursor, tablename)
In [24]:
x = np.array(t1)
y = np.array(r1)
xnew, ynew = cuts_out_repeats(x, y)
x = xnew
y = ynew
modulo = len(x)%5
x = x[:len(x)-modulo]
y = y[:len(y)-modulo]
x = np.array(x)
y = np.array(y)
print len(x), ' ', x[:10]
print len(y), ' ', y[:10]
#sampleSize = 100
windowSize = len(x)/5
#gauss = lambda x,u,s: np.exp(-0.5*((x-u)/s)**2)
#np.random.seed(1)
#x = np.arange(1, sampleSize+1, dtype=float)
#y = np.round(10 + 2*np.sin(x/250) + 8*gauss(x, 700, 15) +
#15*gauss(x, 760, 30) +
#2*np.random.randn(sampleSize))
# Sliding-window Bayesian blocks
# 1) Given a window size, find edges
# 2) Slide window to second edge, which is either a changepoint or the end of
# the first window
# 3) End when reaching the end of the data stream
n = len(y)
xm = []
ym = []
i = 0
while True:
j = min(i + windowSize, n)
e = bayesian_blocks(x[i:j], y[i:j], p0=1e-4)
print(e)
xm += [e[0], e[1]]
select = np.logical_and(x>=e[0], x<=e[1])
#print selct
yavg = np.average(y[select])
ym += [yavg, yavg]
i = np.searchsorted(x, e[1])
if j == n and len(e) == 2:
break
In [25]:
# Full-sample Bayesian blocks
edges = bayesian_blocks(x, y, p0=1e-4)
yavg = []
for i in range(len(edges)-1):
select = np.logical_and(x >= edges[i], x <= edges[i+1])
if np.any(select):
yavg.append(np.average(y[select]))
yavg.append(yavg[-1])
In [26]:
mpl.rc("font", family="serif", size=14)
fig = plt.figure(1, figsize=(10,5))
ax = fig.add_subplot(111)
ax.plot(x, y, 'o', alpha=0.5, label="data")
ax.plot(edges[:len(yavg)], yavg, lw=2, drawstyle="steps-post", color="black",
alpha=0.7, label="Bayesian Blocks (full sample)")
ax.plot(xm, ym, lw=2, drawstyle="steps-post", color="red",
alpha=0.7, label="Bayesian Blocks (sliding window: %d)" % windowSize)
ax.set_ylim([0, 1.5*max(y)])
ax.set_xlabel(r"$x$")
ax.set_ylabel(r"$y$")
h, l = ax.get_legend_handles_labels()
leg = ax.legend(h, l, loc=1, prop={"size":10})
leg.get_frame().set_linewidth(0)
#fig.tight_layout()
plt.show()
In [27]:
avg = avg_rating(y)
In [28]:
plt.scatter(x, avg)
Out[28]:
In [ ]: