Posture Monitor Data Analysis

Extract data from .dat files and then display using matplotlib



In [1]:

    
%matplotlib notebook
from analyze import *
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

Create meta data dictionary



In [2]:

    
# Conditions key:
# b = buzz, s = silent
# j = Justin's WG, r = Seminar room, c = Cafeteria
# e = easy, h = hard

subject_meta = {}
subject_meta[14] = 'bje'
subject_meta[15] = 'bre'
subject_meta[16] = 'sch'
subject_meta[17] = 'sce'
subject_meta[18] = 'sch'
subject_meta[19] = 'sce'
subject_meta[20] = 'bjh'

Load data using functions in analyze.py



In [3]:

    
data = load_all_data()
subject_id = 20
s20 = get_subject(data,subject_id)
baseline = get_baseline(s20)
times, widths = get_distances(s20)
time_delta = [(x-times[0]).total_seconds() for x in times]









    



/Users/justinshenk/Projects/sensei/analyze.py:24: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if 'baseline' in v:

Display subject data before and after outlier removal



In [4]:

    
fig, (ax1, ax2) = plt.subplots(nrows=2,ncols=1)

# Plot raw data
plot_it(ax1,time_delta,widths,conditions=subject_meta[subject_id], baseline=baseline)

# Exlude outliers
# z-score threshold (3) to limit errors caused by signal noise
z_threshold = 3 

# Keep items within z-score of `z_threshold`
time_keep, width_keep = remove_outliers(time_delta, widths, z_threshold)

# Plot clean data
title = """Head Proximity to Computer over Time
Excluding Outliers (z = {})
Subject ID: {} Condition: {}""".format(z_threshold,subject_id, 
                               subject_meta[subject_id])
plot_it(ax2,time_keep,width_keep, conditions=subject_meta[subject_id], \
        baseline=baseline,title=title)
plt.tight_layout()
plt.show()

Plot majority of subjects



In [5]:

    
plot_subjects(data, subject_meta, exclude_outliers=True)









    














    











    



/Users/justinshenk/Projects/sensei/analyze.py:24: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if 'baseline' in v:

Export to R for Statistical Analysis

Multi indexing



In [13]:

    
import csv
import pandas as pd

df = pd.DataFrame()
df_list = []
keys = []

# Move data to pandas dataframe
for subject_id in subject_meta.keys():
    subject_data = get_subject(data,subject_id)
    baseline = get_baseline(subject_data)
    times, widths = get_distances(subject_data)
    time_delta = [(x-times[0]).total_seconds() for x in times]
    # Trim to about first 20 minutes
#     time_delta = time_delta[:21*30]
#     widths = widths[:21*30]
    df = pd.DataFrame(pd.Series(widths, index=time_delta))
    df_list.append(df)
    keys.append(subject_id)









    



/Users/justinshenk/Projects/sensei/analyze.py:24: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if 'baseline' in v:



In [14]:

    
df = pd.concat(df_list, keys = keys)
df









    Out[14]:






  
    
      
      
      0
    
  
  
    
      16
      0.0
      207
    
    
      2.0
      226
    
    
      4.0
      226
    
    
      6.0
      232
    
    
      8.0
      229
    
    
      10.0
      233
    
    
      12.0
      228
    
    
      14.0
      236
    
    
      16.0
      231
    
    
      18.0
      235
    
    
      20.0
      218
    
    
      22.0
      223
    
    
      24.0
      225
    
    
      26.0
      215
    
    
      28.0
      226
    
    
      30.0
      224
    
    
      32.0
      218
    
    
      34.0
      234
    
    
      36.0
      220
    
    
      38.0
      228
    
    
      40.0
      233
    
    
      42.0
      225
    
    
      44.0
      224
    
    
      46.0
      214
    
    
      48.0
      216
    
    
      50.0
      226
    
    
      52.0
      225
    
    
      54.0
      319
    
    
      56.0
      304
    
    
      58.0
      274
    
    
      ...
      ...
      ...
    
    
      15
      1200.0
      184
    
    
      1202.0
      185
    
    
      1204.0
      183
    
    
      1206.0
      183
    
    
      1208.0
      180
    
    
      1210.0
      184
    
    
      1212.0
      185
    
    
      1214.0
      179
    
    
      1216.0
      173
    
    
      1218.0
      178
    
    
      1220.0
      178
    
    
      1222.0
      180
    
    
      1224.0
      182
    
    
      1226.0
      181
    
    
      1228.0
      179
    
    
      1230.0
      178
    
    
      1232.0
      179
    
    
      1234.0
      177
    
    
      1236.0
      182
    
    
      1238.0
      181
    
    
      1240.0
      182
    
    
      1242.0
      182
    
    
      1244.0
      183
    
    
      1246.0
      182
    
    
      1248.0
      182
    
    
      1250.0
      184
    
    
      1252.0
      184
    
    
      1254.0
      182
    
    
      1256.0
      184
    
    
      1258.0
      183
    
  

2840 rows × 1 columns

Single indexing with interpolation



In [8]:

    
# Place data into one index by filling missing values
index = [pd.Timedelta(np.timedelta64(x, 's')) for x in range(1,1240)]
df = pd.DataFrame(index=index)
data = load_all_data()
for subject_id in subject_meta.keys():
    subject_data = get_subject(data,subject_id)
    baseline = get_baseline(subject_data)
    times, widths = get_distances(subject_data)
    time_delta = [(x-times[0]) for x in times]
    # Impute missing values with rolling mean
    impute = lambda z: int(z[pd.notnull(z)].mean())
    df[subject_id] = pd.Series(widths,index=time_delta)









    



/Users/justinshenk/Projects/sensei/analyze.py:24: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if 'baseline' in v:



In [9]:

    
# Fill missing values by interpolation with average of neighbors
df_clean = df.interpolate()
df_clean = df_clean.ffill().fillna(method='backfill').applymap(int)

# Save to csv file for anova analysis in R
df_clean.to_csv('posture_data.csv')

		0
16	0.0	207
	2.0	226
	4.0	226
	6.0	232
	8.0	229
	10.0	233
	12.0	228
	14.0	236
	16.0	231
	18.0	235
	20.0	218
	22.0	223
	24.0	225
	26.0	215
	28.0	226
	30.0	224
	32.0	218
	34.0	234
	36.0	220
	38.0	228
	40.0	233
	42.0	225
	44.0	224
	46.0	214
	48.0	216
	50.0	226
	52.0	225
	54.0	319
	56.0	304
	58.0	274
...	...	...
15	1200.0	184
	1202.0	185
	1204.0	183
	1206.0	183
	1208.0	180
	1210.0	184
	1212.0	185
	1214.0	179
	1216.0	173
	1218.0	178
	1220.0	178
	1222.0	180
	1224.0	182
	1226.0	181
	1228.0	179
	1230.0	178
	1232.0	179
	1234.0	177
	1236.0	182
	1238.0	181
	1240.0	182
	1242.0	182
	1244.0	183
	1246.0	182
	1248.0	182
	1250.0	184
	1252.0	184
	1254.0	182
	1256.0	184
	1258.0	183