In [1]:
import time
import os
import sqlite3
import pandas as pd
import numpy as np
import scipy as sp
Extract data from accelerometer app CSV files.
In [2]:
def disco_parser(inputdir,outputdir,startstring,endstring):
''' Extracts data from accelerometer app CSV files
inputdir = Input data directory
outputdir = Output data directory (timeseries)
startstring = Event start time (HH:MM:SS:MS)
endstring = Event end time (HH:MM:SS:MS)
Returns: timeseries = Subjects' data (id,t,x,y,z) '''
function = 'disco_parser'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function)
# Event start and end times in milliseconds
startnumber = [int(num) for num in startstring.split(':')]
endnumber = [int(num) for num in endstring.split(':')]
starttime = startnumber[0]*60*60*1000 + startnumber[1]*60*1000 + startnumber[2]*1000 + startnumber[3]
endtime = endnumber[0]*60*60*1000 + endnumber[1]*60*1000 + endnumber[2]*1000 + endnumber[3]
timeseries = pd.DataFrame({})
# samplerateMS,samplerateHZ = {},{}
for file in os.listdir(os.path.join('.',inputdir)):
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),file)
temp = pd.read_csv(os.path.join('.',inputdir,file),
skiprows=[0],names=['h','m','s','ms','x','y','z'],
sep=':|;',engine='python')
millisecond = np.add(np.add(np.add(np.array(temp.h)*60*60*1000,
np.array(temp.m)*60*1000),
np.array(temp.s)*1000),
np.array(temp.ms))
try:
startindex = np.where(millisecond <= starttime)[0][-1] # Last timepoint before event start time
endindex = np.where(millisecond >= endtime)[0][0] # First timepoint after event end time
except IndexError: # Time series not within music start and end times
continue
trim = pd.DataFrame({'t':millisecond[startindex:endindex+1],
'x':temp.x[startindex:endindex+1],
'y':temp.y[startindex:endindex+1],
'z':temp.z[startindex:endindex+1],
'id':[file[:-4]]*(endindex-startindex+1)})
timeseries = pd.concat([timeseries,trim])
# Time between each sampling time point (ms)
# difference = np.diff(millisecond[1:-1]) # Exclude first(startindex) and last(endindex) timepoints
# samplerateMS[file[:-4]] = np.mean(difference) # Average sampling rate for each subject (ms)
# samplerateHZ[file[:-4]] = np.divide(1000,samplerateMS[file[:-4]]) # Average sampling rate for each subject (Hz)
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',function+'.pkl')
timeseries.to_pickle(os.path.join('.',outputdir,function+'.pkl')) # To re-load: timeseries = pd.read_pickle(file_name)
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done')
return timeseries
In [3]:
timeseries = disco_parser('testdata','','15:25:44:580','16:02:05:830')
In [4]:
timeseries.head()
Out[4]:
Create SQL database for unprocessed accelerometer data.
In [5]:
def get_db(dbfile):
'''Connect to database.'''
sqlite_db = sqlite3.connect(os.path.join('.',dbfile))
return sqlite_db
def init_db(dbfile,schema):
'''Create the database.'''
db = get_db(dbfile)
db.cursor().executescript(schema)
db.commit()
return db
In [6]:
disco_schema = """
DROP TABLE IF EXISTS "unprocessed";
CREATE TABLE "unprocessed" (
"id" VARCHAR,
"t" FLOAT,
"x" FLOAT,
"y" FLOAT,
"z" FLOAT
);
"""
db = init_db("disco_parser.db",disco_schema) # Initialize the database
timeseries.to_sql("unprocessed",db,if_exists="replace",index=False) # Populate the database
db.commit() # Commit changes to the database
db.close() # Close the connection to the database
Convert condition times to milliseconds from 00:00:00:000.
In [7]:
def disco_millisecond(outputdir,fileID,events):
''' Converts time to milliseconds from 00:00:00:000
outputdir = Output data directory (timeMSEC,timeNAME)
fileID = File identifier ('conditions','wholesongs','songparts')
events = Event time and name strings ('HH:MM:SS:MS','NAME')
Returns: eventtimes = Events' data (name,msec) '''
function = 'disco_millisecond'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function,fileID)
timeMSEC,timeNAME = [],[]
for i in range(len(events)):
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),events[i][1])
timeNUM = [int(num) for num in events[i][0].split(':')]
timeMSEC.append(timeNUM[0]*60*60*1000 + timeNUM[1]*60*1000 + timeNUM[2]*1000 + timeNUM[3])
timeNAME.append(events[i][1])
eventtimes = pd.DataFrame({'name':timeNAME,'msec':timeMSEC})
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',function+'_'+fileID+'.pkl')
eventtimes.to_pickle(os.path.join('.',outputdir,function+'_'+fileID+'.pkl'))
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done')
return eventtimes
In [8]:
eventtimes = disco_millisecond('','conditions',
[('15:25:44:580','Non-Synchrony 1'),
('15:29:53:240','Synchrony 1 (Choreographed)'),
('15:33:37:380','Non-Synchrony 2'),
('15:36:51:310','Synchrony 2 (Anti-Phase)'),
('15:40:56:120','Non-Synchrony 3'),
('15:44:34:950','Synchrony 3 (Magnitude)'),
('15:47:48:610','Non-Synchrony 4'),
('15:50:59:640','Synchrony 4 (Delay)'),
('15:54:25:740','Non-Synchrony 5'),
('15:58:17:370','Synchrony 5 (Non-Choreographed)'),
('16:02:05:830','End Time')])
In [9]:
eventtimes
Out[9]:
Create SQL database for condition times.
In [10]:
msec_schema = """
DROP TABLE IF EXISTS "conditions";
CREATE TABLE "conditions" (
"name" VARCHAR,
"msec" FLOAT
);
"""
db = init_db("disco_millisecond_conditions.db",msec_schema)
eventtimes.to_sql("conditions",db,if_exists="replace",index=False)
db.commit()
db.close()