In [17]:
import argparse
import numpy as np
import matplotlib as mpl

import matplotlib.pyplot as plt
from BayesianBlocks import bayesian_blocks
import MySQLdb
import datetime as dt
%matplotlib inline

In [10]:
def avg_rating(rating):
    avg = [0]*len(rating)
    avg[0] = float(rating[0])
    for k in range(1, len(rating)):
        avg[k]= float(np.mean(rating[:k]))
    return avg

In [11]:
def cuts_out_repeats(x, y):
    hold = x[0]
    xnew = []
    ynew = []
    for i in range(1,len(x)):
        if x[i] == hold:
            pass
        else:
            xnew.append(x[i]/1000)
            ynew.append(y[i])
            hold = x[i]
    return xnew, ynew

In [12]:
def get_data(PID, cursor, tablename):
    sql = "Select RTime, RScore From " +tablename + " Where PID = " + '"' + PID +'";'
    cursor.execute(sql)
    data = cursor.fetchall()
    data = sorted(data)
    rating = np.array(zip(*data)[1], dtype = int)
    time = np.array(zip(*data)[0], dtype = float)
    dates=[dt.datetime.fromtimestamp(ts) for ts in time]
    return rating, time, dates

In [13]:
PIDlist = [' B0000X7CMQ', ' B00005AQ9Q', ' B000GTR2F6', ' B000AQSMPO', ' B00005MF9C', ' B0000E2PEI', ' B0006SFFAQ', ' B00005AQ9Q', ' B00005R19P', ' B000FFQ554', ' B0006ZUHR0']

In [14]:
db = MySQLdb.connect(host="localhost", user="root", db = "home_kitchen")
cursor = db.cursor()

In [15]:
tablename = 'all_hk'
pid1 = PIDlist[0]
pid2 = PIDlist[1]
pid3 = PIDlist[2]

In [18]:
r1, t1, d1 = get_data(pid1, cursor, tablename)
r2, t2, d2 = get_data(pid2, cursor, tablename)
r3, t3, d3 = get_data(pid3, cursor, tablename)

In [24]:
x = np.array(t1)
y = np.array(r1)
xnew, ynew = cuts_out_repeats(x, y)
x = xnew
y = ynew

modulo = len(x)%5

x = x[:len(x)-modulo]
y = y[:len(y)-modulo]

x = np.array(x)
y = np.array(y)

print len(x), '   ', x[:10]
print len(y), '   ', y[:10]
#sampleSize = 100
windowSize = len(x)/5


#gauss = lambda x,u,s: np.exp(-0.5*((x-u)/s)**2)

#np.random.seed(1)
#x = np.arange(1, sampleSize+1, dtype=float)
#y = np.round(10 + 2*np.sin(x/250) + 8*gauss(x, 700, 15) + 
             #15*gauss(x, 760, 30) +
             #2*np.random.randn(sampleSize))

# Sliding-window Bayesian blocks
# 1) Given a window size, find edges
# 2) Slide window to second edge, which is either a changepoint or the end of
#    the first window
# 3) End when reaching the end of the data stream
n = len(y)
xm = []
ym = []
i = 0
while True:
    j = min(i + windowSize, n)
    e = bayesian_blocks(x[i:j], y[i:j], p0=1e-4)
    print(e)

    xm += [e[0], e[1]]
    select = np.logical_and(x>=e[0], x<=e[1])
    #print selct
    yavg = np.average(y[select])
    ym += [yavg, yavg]
    i = np.searchsorted(x, e[1])

    if j == n and len(e) == 2:
        break


915     [ 1081382.4  1088208.   1091491.2  1092355.2  1093996.8  1095120.
  1096675.2  1096848.   1097193.6  1098230.4]
915     [5 5 5 5 4 5 5 5 5 5]
[ 1081382.4  1095897.6  1141862.4  1166832.   1168819.2  1190332.8]
[ 1096675.2  1101513.6  1102291.2  1139140.8  1141257.6  1166832.
  1168819.2  1190246.4  1192320. ]
[ 1101600.   1102291.2  1139140.8  1141257.6  1166832.   1168819.2
  1198627.2]
[ 1102377.6  1139140.8  1141257.6  1166832.   1168819.2  1199923.2]
[ 1139270.4  1141257.6  1166832.   1168819.2  1198843.2  1217203.2
  1230854.4]
[ 1141430.4  1166832.   1168819.2  1198843.2  1217203.2  1233273.6]
[ 1167004.8  1168819.2  1198843.2  1217203.2  1230940.8  1239321.6]
[ 1168905.6  1198843.2  1217203.2  1230940.8  1241395.2]
[ 1199059.2  1217203.2  1230940.8  1253232. ]
[ 1217289.6  1230940.8  1263686.4]
[ 1231027.2  1267056. ]
[ 1267056.   1301961.6]
[ 1301961.6  1342310.4]
[ 1342310.4  1355054.4  1361232. ]
[ 1355097.6  1361232. ]

In [25]:
# Full-sample Bayesian blocks
edges = bayesian_blocks(x, y, p0=1e-4)
yavg = []
for i in range(len(edges)-1):
    select = np.logical_and(x >= edges[i], x <= edges[i+1])
    if np.any(select):
        yavg.append(np.average(y[select]))
yavg.append(yavg[-1])

In [26]:
mpl.rc("font", family="serif", size=14)

fig = plt.figure(1, figsize=(10,5))
ax = fig.add_subplot(111)
ax.plot(x, y, 'o', alpha=0.5, label="data")
ax.plot(edges[:len(yavg)], yavg, lw=2, drawstyle="steps-post", color="black",
        alpha=0.7, label="Bayesian Blocks (full sample)")
ax.plot(xm, ym, lw=2, drawstyle="steps-post", color="red",
        alpha=0.7, label="Bayesian Blocks (sliding window: %d)" % windowSize)
ax.set_ylim([0, 1.5*max(y)])
ax.set_xlabel(r"$x$")
ax.set_ylabel(r"$y$")

h, l = ax.get_legend_handles_labels()
leg = ax.legend(h, l, loc=1, prop={"size":10})
leg.get_frame().set_linewidth(0)

#fig.tight_layout()

plt.show()



In [27]:
avg = avg_rating(y)

In [28]:
plt.scatter(x, avg)


Out[28]:
<matplotlib.collections.PathCollection at 0x10543f110>

In [ ]: