In [82]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate

In [83]:
import MySQLdb

In [84]:
%matplotlib inline

In [149]:
# takes a 1-element list
def running_avg(data):
    avg = [0]*len(data)
    total = 0
    for k in range(len(data)):
        #avg[k] = np.mean(data[:k])
        total += data[k]
        avg[k] = float(total/(k+1))
    return avg

In [86]:
db = MySQLdb.connect(host="localhost", user="root", db = "home_kitchen")

In [87]:
cursor = db.cursor()

In [88]:
tablename = 'all_hk'

In [307]:
PID = 'B000GTR2F6'
PID = ' ' + PID

In [308]:
sql = "Select RTime, RScore From " +tablename + " Where PID = " + '"' + PID +'";'

In [309]:
cursor.execute(sql)


Out[309]:
1432L

In [310]:
data = cursor.fetchall()

In [311]:
data = sorted(data)

In [312]:
rating = zip(*data)[1]
time = zip(*data)[0]
avg =[]
normal_time = [0]*len(time)

In [313]:
for i in range(len(time)):
    normal_time[i]=(float(time[i])-788918400)/(365*24*60*60)

In [314]:
avg = running_avg(rating)

In [315]:
plt.scatter(normal_time, avg)


Out[315]:
<matplotlib.collections.PathCollection at 0x10da845d0>

In [316]:
avg = np.array(avg)

In [317]:
args = np.polyfit(normal_time, avg, 6)

In [318]:
x = normal_time
f = np.poly1d(args)

In [319]:
print f


            6           5         4        3         2
-0.0004657 x + 0.03986 x - 1.405 x + 26.1 x - 269.3 x + 1462 x - 3258

In [320]:
plt.plot(x, f(x), '-', x, avg, 'o')


Out[320]:
[<matplotlib.lines.Line2D at 0x10db5f710>,
 <matplotlib.lines.Line2D at 0x10db5f990>]

In [ ]: