In [2]:
%matplotlib inline
import pandas as pd
In [7]:
daily = pd.read_csv('extract.csv')
minutely = pd.read_csv('extract_raw.csv', index_col='time', parse_dates=['time'])
minutely.head()
Out[7]:
In [4]:
# RawActivity
# 4 = light sleep
# 5 = deep sleep
In [9]:
minutely[(minutely['RawActivity'] == 4) | (minutely['RawActivity'] == 5)].head(20).plot(kind='bar', y='RawActivity')
Out[9]:
In [10]:
sleep = minutely[(minutely['RawActivity'] == 4) | (minutely['RawActivity'] == 5)]
In [27]:
sleep.groupby(pd.TimeGrouper(freq='5Min'))['RawActivity'].mean().round()
Out[27]:
In [49]:
from datetime import datetime
st = datetime(year=2015, month=6, day=24, hour=2, minute=44)
ed = datetime(year=2015, month=6, day=24, hour=3, minute=57)
sleep[st:ed].groupby('RawActivity').count()
Out[49]:
In [50]:
st = datetime(year=2015, month=6, day=24, hour=0, minute=43)
ed = datetime(year=2015, month=6, day=24, hour=1, minute=17)
sleep[st:ed].groupby('RawActivity').count()
Out[50]:
In [51]:
minutely[st:ed].groupby('RawActivity').count()
Out[51]:
In [68]:
st = datetime(year=2015, month=6, day=24, hour=1, minute=18)
ed = datetime(year=2015, month=6, day=24, hour=2, minute=17)
minutely[st:ed].groupby('RawActivity').count()
Out[68]:
In [67]:
st = datetime(year=2015, month=6, day=24, hour=2, minute=18)
ed = datetime(year=2015, month=6, day=24, hour=2, minute=43)
minutely[st:ed].groupby('RawActivity').count()
Out[67]:
In [66]:
st = datetime(year=2015, month=6, day=24, hour=2, minute=44)
ed = datetime(year=2015, month=6, day=24, hour=3, minute=57)
minutely[st:ed].groupby('RawActivity').count()
Out[66]:
In [75]:
st = datetime(year=2015, month=6, day=24, hour=2, minute=44)
ed = datetime(year=2015, month=6, day=24, hour=3, minute=57)
minutely[st:ed].values
Out[75]:
In [65]:
st = datetime(year=2015, month=6, day=24, hour=3, minute=58)
ed = datetime(year=2015, month=6, day=24, hour=4, minute=15)
minutely[st:ed].groupby('RawActivity').count()
Out[65]:
In [64]:
st = datetime(year=2015, month=6, day=24, hour=4, minute=16)
ed = datetime(year=2015, month=6, day=24, hour=4, minute=34)
minutely[st:ed].groupby('RawActivity').count()
Out[64]:
In [63]:
st = datetime(year=2015, month=6, day=24, hour=4, minute=35)
ed = datetime(year=2015, month=6, day=24, hour=4, minute=54)
minutely[st:ed].groupby('RawActivity').count()
Out[63]:
In [72]:
st = datetime(year=2015, month=6, day=24, hour=4, minute=35)
ed = datetime(year=2015, month=6, day=24, hour=4, minute=54)
minutely[st:ed]
Out[72]:
In [62]:
st = datetime(year=2015, month=6, day=24, hour=4, minute=55)
ed = datetime(year=2015, month=6, day=24, hour=5, minute=56)
minutely[st:ed].groupby('RawActivity').count()
Out[62]:
In [61]:
st = datetime(year=2015, month=6, day=24, hour=5, minute=58)
ed = datetime(year=2015, month=6, day=24, hour=6, minute=18)
minutely[st:ed].groupby('RawActivity').count()
Out[61]:
In [69]:
st = datetime(year=2015, month=6, day=24, hour=6, minute=19)
ed = datetime(year=2015, month=6, day=24, hour=7, minute=37)
minutely[st:ed].groupby('RawActivity').count()
Out[69]:
In [70]:
st = datetime(year=2015, month=6, day=24, hour=7, minute=38)
ed = datetime(year=2015, month=6, day=24, hour=7, minute=54)
minutely[st:ed].groupby('RawActivity').count()
Out[70]:
In [104]:
st = datetime(year=2015, month=6, day=24, hour=1, minute=0)
ed = datetime(year=2015, month=6, day=24, hour=8, minute=0)
minutely[st:ed].groupby(pd.TimeGrouper(freq='2Min'))['RawActivity'].mean().round().plot()
Out[104]:
In [105]:
st = datetime(year=2015, month=6, day=22, hour=0, minute=0)
ed = datetime(year=2015, month=6, day=22, hour=6, minute=55)
minutely[st:ed].groupby(pd.TimeGrouper(freq='2Min'))['RawActivity'].mean().round().plot()
Out[105]:
In [109]:
st = datetime(year=2015, month=6, day=20, hour=22, minute=41)
ed = datetime(year=2015, month=6, day=21, hour=8, minute=22)
minutely[st:ed].groupby(pd.TimeGrouper(freq='2Min'))['RawActivity'].mean().round().plot()
Out[109]:
In [110]:
st = datetime(year=2015, month=6, day=19, hour=23, minute=51)
ed = datetime(year=2015, month=6, day=20, hour=6, minute=51)
minutely[st:ed].groupby(pd.TimeGrouper(freq='2Min'))['RawActivity'].mean().round().plot()
Out[110]:
In [119]:
st = datetime(year=2015, month=6, day=19, hour=23, minute=51)
ed = datetime(year=2015, month=6, day=20, hour=3, minute=0)
minutely[st:ed]['RawActivity'].value_counts()
Out[119]:
In [ ]:
minutely[st:ed].groupby(pd.TimeGrouper(freq='2Min'))['RawActivity'].mean().round().plot()
In [121]:
from sklearn.ensemble import RandomForestClassifier
In [122]:
st = datetime(year=2015, month=6, day=24, hour=0, minute=0)
ed = datetime(year=2015, month=6, day=24, hour=23, minute=59)
minutely[st:ed]['RawActivity'].value_counts()
Out[122]:
In [125]:
daily = pd.read_csv('extract.csv', index_col='Date', parse_dates=['Date', 'SleepStart', 'SleepEnd'])
In [126]:
daily.head()
Out[126]:
In [131]:
known_day = daily.ix[datetime(year=2015, month=6, day=24)]
known_day
Out[131]:
In [134]:
sleep_start, sleep_end = known_day['SleepStart'], known_day['SleepEnd']
sleep_start, sleep_end
Out[134]:
In [135]:
minutely[sleep_start:sleep_end]
Out[135]:
In [139]:
data = minutely[sleep_start:sleep_end].groupby(pd.TimeGrouper(freq='10Min'))
In [285]:
minutely[sleep_start:sleep_end].plot(y='RawSensorData')
Out[285]:
In [163]:
chunk = minutely[sleep_start:sleep_end]
chunk['RawActivity4'] = 0
chunk['RawActivity5'] = 0
chunk
Out[163]:
In [164]:
chunk.ix[chunk['RawActivity'] == 4, 'RawActivity4'] = 1
chunk.ix[chunk['RawActivity'] == 5, 'RawActivity5'] = 1
chunk
Out[164]:
In [165]:
chunk['RawActivity4'].sum()
Out[165]:
In [166]:
chunk['RawActivity5'].sum()
Out[166]:
In [241]:
feats = chunk.groupby(pd.TimeGrouper(freq='5Min'))['RawActivity4', 'RawActivity5', 'RawSensorData'].sum()
feats
Out[241]:
In [290]:
deep_sleep = [
(datetime(year=2015, month=6, day=24, hour=1, minute=18), datetime(year=2015, month=6, day=24, hour=2, minute=17)),
(datetime(year=2015, month=6, day=24, hour=2, minute=44), datetime(year=2015, month=6, day=24, hour=3, minute=37)),
(datetime(year=2015, month=6, day=24, hour=4, minute=16), datetime(year=2015, month=6, day=24, hour=4, minute=34)),
(datetime(year=2015, month=6, day=24, hour=4, minute=55), datetime(year=2015, month=6, day=24, hour=5, minute=56)),
(datetime(year=2015, month=6, day=24, hour=6, minute=19), datetime(year=2015, month=6, day=24, hour=7, minute=37)),
]
X = []
Y = []
sensor_data = []
for date, features in zip(np.array(feats.index), feats.values):
ts = date.astype(datetime)
ns = 1e-9
date = datetime.fromtimestamp(ts * ns)
for s, e in deep_sleep:
if s < date < e:
label = True
break
else:
label = False
sensor_data.append(features[2])
X.append([features[2]])
Y.append(label)
X
Out[290]:
In [258]:
import numpy as np
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
In [257]:
rf = RandomForestClassifier()
XY = list(zip(X,Y))
np.random.shuffle(XY)
XY_train, XY_test = XY[:70], XY[70:]
X_train, Y_train = zip(*XY_train)
X_test, Y_test = zip(*XY_test)
rf.fit(X_train, Y_train)
predicted = rf.predict(X_test)
metrics.accuracy_score(Y_test, predicted)
Out[257]:
In [271]:
lr = LogisticRegression()
XY = list(zip(X,Y))
np.random.shuffle(XY)
XY_train, XY_test = XY[:70], XY[70:]
X_train, Y_train = zip(*XY_train)
X_test, Y_test = zip(*XY_test)
lr.fit(X_train, Y_train)
predicted = lr.predict(X_test)
metrics.accuracy_score(Y_test, predicted)
Out[271]:
In [272]:
import pylab as plt
In [278]:
plt.plot(X, np.zeros_like(X), 'x')
Out[278]:
In [236]:
Out[236]:
In [237]:
In [238]:
Out[238]:
In [239]:
metrics.confusion_matrix(Y_test, predicted)
Out[239]:
In [282]:
from sklearn.naive_bayes import GaussianNB
m = GaussianNB()
XY = list(zip(X,Y))
np.random.shuffle(XY)
XY_train, XY_test = XY[:70], XY[70:]
X_train, Y_train = zip(*XY_train)
X_test, Y_test = zip(*XY_test)
m.fit(X_train, Y_train)
predicted = m.predict(X_test)
metrics.accuracy_score(Y_test, predicted)
Out[282]:
In [288]:
def smooth(x,window_len=11,window='hanning'):
if x.ndim != 1:
raise (ValueError, "smooth only accepts 1 dimension arrays.")
if x.size < window_len:
raise (ValueError, "Input vector needs to be bigger than window size.")
if window_len<3:
return x
if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
raise (ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
s=np.r_[2*x[0]-x[window_len-1::-1],x,2*x[-1]-x[-1:-window_len:-1]]
if window == 'flat': #moving average
w=np.ones(window_len,'d')
else:
w=eval('np.'+window+'(window_len)')
y=np.convolve(w/w.sum(),s,mode='same')
return y[window_len:-window_len+1]
In [327]:
sensor_data_smoothed = smooth(np.array(sensor_data), window='hanning')
#plt.plot(1 - sensor_data_smoothed)
arr = np.array(sensor_data_smoothed)
thresh = 15
arr[np.where(arr <= thresh)] = 1
arr[np.where(arr > thresh)] = 0
plt.plot(arr)
plt.ylim([-1,2])
Out[327]:
In [318]:
Out[318]:
In [338]:
known_day = daily.ix[datetime(year=2015, month=6, day=22)]
start, end = known_day['SleepStart'], known_day['SleepEnd']
chunk = minutely[start:end]
sensor_data = chunk['RawSensorData'].values
sensor_data_smoothed = smooth(np.array(sensor_data), window='hanning')
#plt.plot(1 - sensor_data_smoothed)
arr = np.array(sensor_data_smoothed)
thresh = 10
arr[np.where(arr <= thresh)] = 1
arr[np.where(arr > thresh)] = 0
plt.plot(arr)
plt.ylim([-1,2])
Out[338]:
In [448]:
known_day = daily.ix[datetime(year=2015, month=6, day=21)]
start, end = known_day['SleepStart'], known_day['SleepEnd']
chunk = minutely[start:end]
sensor_data = chunk['RawSensorData'].values
#plt.plot(1 - sensor_data_smoothed)
plt.figure(figsize=(20,10))
sds = pd.Series(sensor_data)
#sds.plot(kind='hist')
sensor_data[np.where(sensor_data > 20)] = 20
sensor_data_smoothed = smooth(sensor_data, window='hanning', window_len=24)
#plt.plot(sensor_data, c='grey')
plt.plot(sensor_data_smoothed, c='grey')
arr = np.array(sensor_data_smoothed)
thresh = 3
arr[np.where(arr <= thresh)] = thresh
arr[np.where(arr > thresh)] = 0
plt.plot(arr, c='red')
#smoothed2 = smooth(arr, window='bartlett', window_len=24)
#arr = np.array(smoothed2)
#plt.plot(arr**2, c='blue')
#thresh = 2
#arr[np.where(arr <= thresh)] = 0
#arr[np.where(arr > thresh)] = thresh
#plt.plot(arr, c='blue')
#plt.axhline(7, c='orange')
plt.ylim([-20,80])
Out[448]:
In [450]:
known_day = daily.ix[datetime(year=2015, month=6, day=24)]
start, end = known_day['SleepStart'], known_day['SleepEnd']
chunk = minutely[start:end]
sensor_data = chunk['RawSensorData'].values
#plt.plot(1 - sensor_data_smoothed)
plt.figure(figsize=(20,10))
sds = pd.Series(sensor_data)
#sds.plot(kind='hist')
sensor_data[np.where(sensor_data > 20)] = 20
sensor_data_smoothed = smooth(sensor_data, window='hanning', window_len=24)
#plt.plot(sensor_data, c='grey')
plt.plot(sensor_data_smoothed, c='grey')
arr = np.array(sensor_data_smoothed)
thresh = 2.5
arr[np.where(arr <= thresh)] = thresh
arr[np.where(arr > thresh)] = 0
plt.plot(arr, c='red')
#smoothed2 = smooth(arr, window='bartlett', window_len=24)
#arr = np.array(smoothed2)
#plt.plot(arr**2, c='blue')
#thresh = 2
#arr[np.where(arr <= thresh)] = 0
#arr[np.where(arr > thresh)] = thresh
#plt.plot(arr, c='blue')
#plt.axhline(7, c='orange')
plt.ylim([-20,80])
Out[450]:
In [449]:
known_day = daily.ix[datetime(year=2015, month=6, day=20)]
start, end = known_day['SleepStart'], known_day['SleepEnd']
chunk = minutely[start:end]
sensor_data = chunk['RawSensorData'].values
#plt.plot(1 - sensor_data_smoothed)
plt.figure(figsize=(20,10))
#sds = pd.Series(sensor_data)
#sds.plot(kind='hist')
sensor_data[np.where(sensor_data > 20)] = 20
sensor_data_smoothed = smooth(sensor_data, window='hanning', window_len=24)
#plt.plot(sensor_data, c='grey')
plt.plot(sensor_data_smoothed, c='grey')
arr = np.array(sensor_data_smoothed)
thresh = 3
arr[np.where(arr <= thresh)] = thresh
arr[np.where(arr > thresh)] = 0
plt.plot(arr, c='red')
#smoothed2 = smooth(arr, window='bartlett', window_len=24)
#arr = np.array(smoothed2)
#plt.plot(arr**2, c='blue')
#thresh = 2
#arr[np.where(arr <= thresh)] = 0
#arr[np.where(arr > thresh)] = thresh
#plt.plot(arr, c='blue')
#plt.axhline(7, c='orange')
plt.ylim([-20,80])
Out[449]:
In [ ]: