In [1]:
%matplotlib inline
import time
import os
import sqlite3
import pandas as pd
import numpy as np
import scipy as sp
import scipy.signal as sig
import pywt # PyWavelets
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import seaborn as sns
Load table of unprocessed accelerometer data.
In [2]:
dbase = sqlite3.connect("disco_parser.db")
dframe = pd.read_sql_query("SELECT * FROM unprocessed",dbase)
dbase.close()
In [3]:
dframe.head()
Out[3]:
Line chart of the three accelerometer axes. (Subject: 1a)
In [2]:
# Load accelerometer data for subject
subject = '1a'
db = sqlite3.connect("disco_parser.db")
df0 = pd.read_sql_query("SELECT id, t, x, y, z FROM unprocessed WHERE id == '"+subject+"'",db)
db.close()
In [3]:
# Fill in missing samples with NaNs
t = np.arange(df0['t'].values[0],df0['t'].values[-1])
v1 = v2 = v3 = np.empty(len(t)); v1[:] = v2[:] = v3[:] = np.NAN
df1 = pd.DataFrame(dict(t=t,x0=v1,y0=v2,z0=v3))
df2 = df0[['t','x','y','z']]
df = df1.merge(df2,how='left',left_on='t',right_on='t')
df = df[['t','x','y','z']]
In [4]:
start,end = 55544580,57725830 # Session start and end times
first,last = np.where(df['t']==start)[0][0],np.where(df['t']==end)[0][0]+1
T = df['t'][first:last].values # Milliseconds
X = df['x'][first:last].values # X-axis
Y = df['y'][first:last].values # Y-axis
Z = df['z'][first:last].values # Z-axis
with sns.axes_style("whitegrid"):
fig = plt.figure(); plt.clf()
plt.plot(T,X,label='X-axis')
plt.plot(T,Y,label='Y-axis')
plt.plot(T,Z,label='Z-axis')
plt.axis([start,end,-2,2])
plt.ticklabel_format(style='plain',axis='x')
plt.legend()
plt.xlabel("Timepoint (ms)")
plt.ylabel("Acceleration (m/s^2)")
plt.title("Subject "+subject+": Unprocessed Accelerometer Data")
plt.show()
Combine the three axis measures into one timeseries. (Options: vector, xalign, yalign, zalign)
In [4]:
def disco_combine(inputdata,outputdir,combotype):
''' Combines the three axis measures into one measure
inputdata = Input data table (id,t,x,y,z)
outputdir = Output data directory
combotype = Combination type ('vector','xalign','yalign','zalign')
Returns: timeseries = Combined axis values (id,t,v) '''
function = 'disco_combine'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function,combotype)
subjects = inputdata.id.unique()
if combotype == 'vector': # SQRT(X^2 + Y^2 + Z^2)
timeseries = pd.DataFrame({})
for s in subjects:
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),s)
x = inputdata[inputdata.id == s].x.values
y = inputdata[inputdata.id == s].y.values
z = inputdata[inputdata.id == s].z.values
v = np.sqrt(x**2 + y**2 + z**2)
temp = pd.DataFrame({'id':inputdata[inputdata.id == s].id,
't':inputdata[inputdata.id == s].t,
'v':v})
timeseries = pd.concat([timeseries,temp])
elif combotype == 'xalign': # ARCSIN(X / SQRT(X^2 + Y^2 + Z^2))
timeseries = pd.DataFrame({})
for s in subjects:
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),s)
x = inputdata[inputdata.id == s].x.values
y = inputdata[inputdata.id == s].y.values
z = inputdata[inputdata.id == s].z.values
v = np.arcsin(x / np.sqrt(x**2 + y**2 + z**2))
temp = pd.DataFrame({'id':inputdata[inputdata.id == s].id,
't':inputdata[inputdata.id == s].t,
'v':v})
timeseries = pd.concat([timeseries,temp])
elif combotype == 'yalign': # ARCSIN(Y / SQRT(X^2 + Y^2 + Z^2))
timeseries = pd.DataFrame({})
for s in subjects:
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),s)
x = inputdata[inputdata.id == s].x.values
y = inputdata[inputdata.id == s].y.values
z = inputdata[inputdata.id == s].z.values
v = np.arcsin(y / np.sqrt(x**2 + y**2 + z**2))
temp = pd.DataFrame({'id':inputdata[inputdata.id == s].id,
't':inputdata[inputdata.id == s].t,
'v':v})
timeseries = pd.concat([timeseries,temp])
elif combotype == 'zalign': # ARCSIN(Z / SQRT(X^2 + Y^2 + Z^2))
timeseries = pd.DataFrame({})
for s in subjects:
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),s)
x = inputdata[inputdata.id == s].x.values
y = inputdata[inputdata.id == s].y.values
z = inputdata[inputdata.id == s].z.values
v = np.arcsin(z / np.sqrt(x**2 + y**2 + z**2))
temp = pd.DataFrame({'id':inputdata[inputdata.id == s].id,
't':inputdata[inputdata.id == s].t,
'v':v})
timeseries = pd.concat([timeseries,temp])
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',function+'_'+combotype+'.pkl')
timeseries.to_pickle(os.path.join('.',outputdir,function+'_'+combotype+'.pkl'))
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done')
return timeseries
In [5]:
vector = disco_combine(dframe,'','vector')
xalign = disco_combine(dframe,'','xalign')
yalign = disco_combine(dframe,'','yalign')
zalign = disco_combine(dframe,'','zalign')
In [6]:
vector.head()
Out[6]:
In [7]:
xalign.head()
Out[7]:
In [8]:
yalign.head()
Out[8]:
In [9]:
zalign.head()
Out[9]:
Add combined axes columns to table of unprocessed data.
In [12]:
combodict = {'vector':vector,'xalign':xalign,'yalign':yalign,'zalign':zalign}
for combo in combodict.keys():
newcol = combodict[combo].rename(columns={'v':combo})
dframe = pd.concat([dframe,newcol[combo]],axis=1)
In [13]:
dframe.head()
Out[13]:
Replace old database with new database including combined axes columns.
In [14]:
combo_schema = """
DROP TABLE IF EXISTS "unprocessed";
CREATE TABLE "unprocessed" (
"id" VARCHAR,
"t" FLOAT,
"x" FLOAT,
"y" FLOAT,
"z" FLOAT,
"vector" FLOAT,
"xalign" FLOAT,
"yalign" FLOAT,
"zalign" FLOAT
);
"""
dbase = sqlite3.connect("disco_parser.db")
dbase.cursor().executescript(combo_schema); dbase.commit()
dframe.to_sql("unprocessed",dbase,if_exists="replace",index=False); dbase.commit()
dbase.close()
Line chart of the combined axes. (Type: z-align)
In [2]:
# Load combined accelerometer axes for subject
subject = '1a'
db = sqlite3.connect("disco_parser.db")
df0 = pd.read_sql_query("SELECT id, t, zalign FROM unprocessed WHERE id == '"+subject+"'",db)
db.close()
In [3]:
# Fill in missing samples with NaNs
t = np.arange(df0['t'].values[0],df0['t'].values[-1])
df1 = pd.DataFrame(dict(t=t))
df2 = df0[['t','zalign']]
df = df1.merge(df2,how='left',left_on='t',right_on='t')
In [4]:
start,end = 55544580,57725830 # Session start and end times
first,last = np.where(df['t']==start)[0][0],np.where(df['t']==end)[0][0]+1
T = df['t'][first:last].values # Milliseconds
V = df['zalign'][first:last].values # Z-aligned
with sns.axes_style("whitegrid"):
fig = plt.figure(); plt.clf()
plt.plot(T,V)
plt.axis([start,end,-2,2])
plt.ticklabel_format(style='plain',axis='x')
plt.xlabel("Timepoint (ms)")
plt.ylabel("Acceleration (m/s^2)")
plt.title("Subject "+subject+": Combined Accelerometer Axes (Z-align)")
plt.show()
Load combined axes timeseries.
In [15]:
dbase = sqlite3.connect("disco_parser.db")
dframe = pd.read_sql_query("SELECT id, t, vector, xalign, yalign, zalign FROM unprocessed",dbase)
dbase.close()
dframe.head()
Out[15]:
Interpolate combined axes timeseries. (Options: linear, cubic, nearest)
In [16]:
def disco_interpolate(inputdata,combotype,outputdir,interptype,startstring,endstring):
''' Interpolates accelerometer timeseries
inputdata = Input data table (id,t,v[vector/xalign/yalign/zalign])
combotype = 'vector' / 'xalign' / 'yalign' / 'zalign'
outputdir = Output data directory
interptype = Interpolation type ('linear','cubic','nearest')
startstring = Event start time (HH:MM:SS:MS)
endstring = Event end time (HH:MM:SS:MS)
Returns: timeseries = Interpolated values (id,t,v) '''
function = 'disco_interpolate'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function,interptype)
# Event start and end times in milliseconds
startnumber = [int(num) for num in startstring.split(':')]
endnumber = [int(num) for num in endstring.split(':')]
starttime = startnumber[0]*60*60*1000 + startnumber[1]*60*1000 + startnumber[2]*1000 + startnumber[3]
endtime = endnumber[0]*60*60*1000 + endnumber[1]*60*1000 + endnumber[2]*1000 + endnumber[3]
subjects = inputdata.id.unique()
timeseries = pd.DataFrame({})
for s in subjects:
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),s)
X = inputdata[inputdata.id == s].t.values # Sampled milliseconds
Y = inputdata[inputdata.id == s].v.values # Sampled acceleration
U,I = np.unique(X,return_index=True) # Find unique milliseconds
X = X[I] # Remove duplicate milliseconds (i.e., 2 samples in 1 ms)
Y = Y[I] # Remove corresponding acceleration samples
if interptype == 'linear' or interptype == 'nearest':
F = sp.interpolate.interp1d(X,Y,kind=interptype)
elif interptype == 'cubic':
F = sp.interpolate.PchipInterpolator(X,Y)
XQ = np.arange(X[0],X[-1]+1) # Interpolated milliseconds
YQ = F(XQ) # Interpolated acceleration samples
startindex = np.where(XQ == starttime)[0][0]
endindex = np.where(XQ == endtime)[0][0]
temp = pd.DataFrame({'id':[s]*(endindex-startindex+1),
't':XQ[startindex:endindex+1],
'v':YQ[startindex:endindex+1]})
timeseries = pd.concat([timeseries,temp])
outputfile = function+'_'+interptype+'_'+combotype+'.pkl'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',outputfile)
timeseries.to_pickle(os.path.join('.',outputdir,outputfile))
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done')
return timeseries
In [17]:
zalign = dframe[['id','t','zalign']].rename(columns={'zalign':'v'})
zalign.head()
Out[17]:
In [18]:
linear_zalign = disco_interpolate(zalign,'zalign','', 'linear','15:25:44:580','16:02:05:830')
nearest_zalign = disco_interpolate(zalign,'zalign','','nearest','15:25:44:580','16:02:05:830')
cubic_zalign = disco_interpolate(zalign,'zalign','', 'cubic','15:25:44:580','16:02:05:830')
In [19]:
linear_zalign.head()
Out[19]:
In [20]:
nearest_zalign.head()
Out[20]:
In [21]:
cubic_zalign.head()
Out[21]:
Line chart of the interpolated timeseries. (Type: linear)
In [2]:
# Load interpolated timeseries for subject
subject = '1a'
df = pd.read_pickle('disco_interpolate_linear_zalign.pkl')
df = df[df.id==subject]
In [3]:
start,end = 55544580,57725830 # Session start and end times
first,last = np.where(df['t']==start)[0][0],np.where(df['t']==end)[0][0]+1
T = df['t'][first:last].values # Time
V = df['v'][first:last].values # Values
with sns.axes_style("whitegrid"):
fig = plt.figure(); plt.clf()
plt.plot(T,V)
plt.axis([start,end,-2,2])
plt.ticklabel_format(style='plain',axis='x')
plt.xlabel("Timepoint (ms)")
plt.ylabel("Acceleration (m/s^2)")
plt.title("Subject "+subject+": Interpolated Accelerometer Data (Linear)")
plt.show()
Load interpolated timeseries.
In [22]:
linear_zalign = pd.read_pickle('disco_interpolate_linear_zalign.pkl')
linear_zalign.head()
Out[22]:
Downsample interpolated timeseries back to actual average sampling rate. (Options: average, decimate)
In [25]:
def disco_downsample(indata,intype,outdir,outtype,newrate):
''' Downsamples timeseries data
indata = Input data table (id,t,v)
intype = ('linear','nearest','cubic') x ('vector','xalign','yalign','zalign')
outdir = Output data directory
outtype = Downsampling type ('average','decimate')
- 'average': Replace each set of N samples by their mean
(Preserves temporal shape of waveform)
- 'decimate': Apply a low-pass filter before resampling
(Preserves spectral information of waveform)
newrate = New sampling rate in ms
- e.g., ceil(mean of actual rate across Ss) = ceil(15.4120) = 16 ms
Returns: timeseries = Downsampled values (id,t,v) '''
function = 'disco_downsample'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function,outtype)
subjects = indata.id.unique()
timeseries = pd.DataFrame({})
for s in subjects:
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),s)
# Detrend timeseries first
det = sig.detrend(indata[indata.id==s].v.values) # Linear detrend
# Pad timeseries with mean of last window so length divisible by 'newrate'
# 'newrate' is rounded up from average sampling rate so not using a slightly faster sampling rate
padTS = np.zeros(int(newrate*np.ceil(len(det)/newrate)))
padTS[0:len(det)] = det
padRM = len(det) % newrate # Last window
padVL = len(padTS) - len(det) # Pad amount
padTS[len(det):len(padTS)] = np.repeat(np.mean(det[len(det)-padRM:len(det)]),padVL)
# Pad time information with additional milliseconds so length divisible by 'newrate'
msec = indata[indata.id==s].t.values
padMS = np.zeros(int(newrate*np.ceil(len(msec)/newrate)))
padMS[0:len(msec)] = msec
padVL = len(padMS) - len(msec) # Pad amount
padMS[len(msec):len(padMS)] = np.arange(msec[-1]+1,msec[-1]+1+padVL)
# Mean timepoint for each new sample
newMS = np.reshape(padMS,(int(len(padMS)/newrate),newrate))
downMS = np.mean(newMS,axis=1)
# Downsample timeseries
if outtype == 'average': # Preserves temporal shape of waveform
# Replace each set of of N (=newrate) samples by their mean
newTS = np.reshape(padTS,(int(len(padTS)/newrate),newrate))
downTS = np.mean(newTS,axis=1)
elif outtype == 'decimate': # Preserves spectral information of waveform
# Resample at a lower rate after low-pass filtering (to prevent aliasing)
downTS = sig.decimate(padTS,newrate)
temp = pd.DataFrame({'id':[s]*len(downTS),
't':downMS,
'v':downTS})
timeseries = pd.concat([timeseries,temp])
outfile = function+'_'+outtype+'_'+intype+'.pkl'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',outfile)
timeseries.to_pickle(os.path.join('.',outdir,outfile))
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done')
return timeseries
In [26]:
average_linear_zalign = disco_downsample(linear_zalign,'linear_zalign','','average',int(np.ceil(15.4120)))
decimate_linear_zalign = disco_downsample(linear_zalign,'linear_zalign','','decimate',int(np.ceil(15.4120)))
In [27]:
average_linear_zalign.head()
Out[27]:
In [28]:
decimate_linear_zalign.head()
Out[28]:
Line chart of the downsampled timeseries. (Type: average)
In [2]:
# Load downsampled timeseries for subject
subject = '1a'
df = pd.read_pickle('disco_downsample_average_linear_zalign.pkl')
df = df[df.id==subject]
In [3]:
start,end = 55544580,57725830 # Session start and end times
first,last = np.where(df['t']>=start)[0][0],np.where(df['t']>=end)[0][-1]+1
T = df['t'][first:last].values # Time
V = df['v'][first:last].values # Values
with sns.axes_style("whitegrid"):
fig = plt.figure(); plt.clf()
plt.plot(T,V)
plt.axis([start,end,-2,2])
plt.ticklabel_format(style='plain',axis='x')
plt.xlabel("Timepoint (ms)")
plt.ylabel("Acceleration (m/s^2)")
plt.title("Subject "+subject+": Downsampled Accelerometer Data (Average)")
plt.show()
Load downsampled timeseries.
In [29]:
average_linear_zalign = pd.read_pickle('disco_downsample_average_linear_zalign.pkl')
average_linear_zalign.head()
Out[29]:
Wavelet decomposition of downsampled timeseries. (Options: coif1/2/3, db2/4/6, sym2/4/6)
In [177]:
def disco_decompose(indata,intype,outdir,outtype):
''' Wavelet decomposition of timeseries data
indata = Input data table (id,t,v)
intype = ('ave','dec') x ('lin','nea','cub') x ('vec','xal','yal','zal')
outdir = Output data directory
outtype = Wavelet to use (coif1/3/5,db2/4/6,sym2/4/6)
Returns: timeseries = Decomposed values (id,t,v0,v1...vn) '''
function = 'disco_decompose'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function,outtype)
subjects = indata.id.unique()
timeseries = pd.DataFrame({})
for s in subjects:
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),s)
# Number of levels in wavelet transformation
# levels = floor(log2(length(old{1,1}(:,1))));
# modwt: Maximal overlap discrete wavelet transform
# i.e., Stationary wavelet transform (SWT)
# W = modwt(X,WNAME): modwt of a signal X using the wavelet WNAME
# W: LEV+1-by-N matrix of wavelet coeffs & final-level scaling coeffs
# LEV: Level of the modwt
# m-th row: Wavelet (detail) coefficients for scale 2^m
# LEV+1-th row: Scaling coefficients for scale 2^LEV
# imodwt: Inverse maximal overlap discrete wavelet transform
# R = imodwt(W,Lo,Hi): Reconstructs signal using scaling filter Lo & wavelet filter Hi
# pywt.swt from PyWavelets only works for input length divisible by 2**level
# Defeats the purpose of MODWT, which is supposed to be able to take input of any length
# modwt_transform and inverse_modwt_transform from PyMultiscale
# Looks like a decent alternative, but unable to run Cython code
D = indata[indata.id==s].v.values # Timeseries data
D = sp.stats.zscore(D) # Normalization: mean = 0, variance = 1
lev = int(np.floor(np.log2(len(D)))) # Maximum number of levels
rem = int(len(D) % 2**lev) # Samples to remove so divisible by 2^levels
W = pywt.swt(D[:-rem],outtype) # Wavelet coefficients
R = {}
for L in range(lev):
R['L'+str(L).zfill(2)] = pywt.iswt([W[lev-L-1]],outtype) # Reconstructed signal at each level
R = pd.DataFrame(R)
temp = pd.DataFrame({'id':indata[indata.id==s].id.values[:-rem],
't':indata[indata.id==s].t.values[:-rem]})
temp = pd.concat([temp,R],axis=1)
timeseries = pd.concat([timeseries,temp])
outfile = function+'_'+outtype+'_'+intype+'.pkl'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',outfile)
timeseries.to_pickle(os.path.join('.',outdir,outfile))
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done')
return timeseries
In [178]:
coif1_average_linear_zalign = disco_decompose(average_linear_zalign,'average_linear_zalign','','coif1')
sym4_average_linear_zalign = disco_decompose(average_linear_zalign,'average_linear_zalign','','sym4')
db6_average_linear_zalign = disco_decompose(average_linear_zalign,'average_linear_zalign','','db6')
In [179]:
coif1_average_linear_zalign.head()
Out[179]:
In [180]:
sym4_average_linear_zalign.head()
Out[180]:
In [181]:
db6_average_linear_zalign.head()
Out[181]:
Line chart of the decomposed timeseries. (Type: coif1)
In [2]:
# Load decomposed timeseries for subject
subject = '1a'
df = pd.read_pickle('disco_decompose_coif1_average_linear_zalign.pkl')
df = df[df.id==subject]
In [5]:
start,end = 55544580,57725830 # Session start and end times
first,last = 0,-1
T = df['t'][first:last].values # Time
L01 = df['L01'][first:last].values # Level 1
# L04 = df['L04'][first:last].values # Level 4
L08 = df['L08'][first:last].values # Level 8
# L12 = df['L12'][first:last].values # Level 12
L16 = df['L16'][first:last].values # Level 16
with sns.axes_style("whitegrid"):
fig = plt.figure(); plt.clf()
plt.plot(T,L01,label='Level 1')
# plt.plot(T,L04,label='Level 4')
plt.plot(T,L08,label='Level 8')
# plt.plot(T,L12,label='Level 12')
plt.plot(T,L16,label='Level 16')
plt.axis([start,end,-40,40])
plt.ticklabel_format(style='plain',axis='x')
plt.legend()
plt.xlabel("Timepoint (ms)")
plt.ylabel("Normalized Acceleration (m/s^2)")
plt.title("Subject "+subject+": Decomposed Accelerometer Data (Coiflet 1)")
plt.show()