In [3]:
import numpy as np
import matplotlib.pyplot as plt
import MySQLdb
%matplotlib inline

In [4]:
db = MySQLdb.connect(host="localhost", user="root", db = "home_kitchen")
cursor = db.cursor()

In [5]:
tablename = 'all_hk'

In [17]:
PIDlistquery = 'Select PID from (SELECT PID, RScore, COUNT(*) AS magnitude FROM all_hk GROUP BY PID HAVING magnitude > 500 ORDER BY magnitude ASC LIMIT 200) as a'
cursor.execute(PIDlistquery)
Pidlist = cursor.fetchall()

In [42]:
pidlist = zip(*Pidlist)[0]

In [43]:
def running_avg(data):
    avg = [0]*len(data)
    total = 0
    for k in range(len(data)):
        #avg[k] = np.mean(data[:k])
        total += data[k]
        avg[k] = float(total/(k+1))
    return avg

In [55]:
PID = pidlist[2]
print PID


 B000P1NYE8

In [56]:
sql = "Select RTime, RScore From " +tablename + " Where PID = " + '"' + PID +'";'

In [57]:
cursor.execute(sql)
data = cursor.fetchall()
data = sorted(data)

In [58]:
rating = zip(*data)[1]
time = zip(*data)[0]
avg =[]
normal_time = [0]*len(time)

In [59]:
avg = running_avg(rating)

In [60]:
nums = np.arange(len(avg))

In [61]:
diffs = [avg[i] - avg[len(avg)-1] for i in range(len(avg))]

In [62]:
plt.scatter(nums, diffs)


Out[62]:
<matplotlib.collections.PathCollection at 0x1135284d0>

In [65]:
#how long til convergence within .1?
a = 0
for i in range(len(diffs)):
    if -.1<diffs[i]<.1:
        a = i
        
print a, len(diffs)


509 510

In [64]:
print PID


 B000P1NYE8

In [ ]: