Database



In [1]:

    
import time
import os

import sqlite3

import pandas as pd
import numpy as np
import scipy as sp

Unprocessed accelerometer data

Extract data from accelerometer app CSV files.



In [2]:

    
def disco_parser(inputdir,outputdir,startstring,endstring):
    ''' Extracts data from accelerometer app CSV files    
    inputdir = Input data directory
    outputdir = Output data directory (timeseries)
    startstring = Event start time (HH:MM:SS:MS)
    endstring = Event end time (HH:MM:SS:MS) 
    Returns: timeseries = Subjects' data (id,t,x,y,z) ''' 

    function = 'disco_parser'
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function)
    
    # Event start and end times in milliseconds
    startnumber = [int(num) for num in startstring.split(':')]
    endnumber = [int(num) for num in endstring.split(':')]
    starttime = startnumber[0]*60*60*1000 + startnumber[1]*60*1000 + startnumber[2]*1000 + startnumber[3]
    endtime = endnumber[0]*60*60*1000 + endnumber[1]*60*1000 + endnumber[2]*1000 + endnumber[3]
    
    timeseries = pd.DataFrame({})
    # samplerateMS,samplerateHZ = {},{}
    for file in os.listdir(os.path.join('.',inputdir)):
        
        print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),file)
            
        temp = pd.read_csv(os.path.join('.',inputdir,file),
                           skiprows=[0],names=['h','m','s','ms','x','y','z'],
                           sep=':|;',engine='python')

        millisecond = np.add(np.add(np.add(np.array(temp.h)*60*60*1000,
                                           np.array(temp.m)*60*1000),
                                           np.array(temp.s)*1000),
                                           np.array(temp.ms))            

        try: 
            startindex = np.where(millisecond <= starttime)[0][-1] # Last timepoint before event start time
            endindex = np.where(millisecond >= endtime)[0][0] # First timepoint after event end time
        except IndexError: # Time series not within music start and end times
            continue
        
        trim = pd.DataFrame({'t':millisecond[startindex:endindex+1],
                             'x':temp.x[startindex:endindex+1],
                             'y':temp.y[startindex:endindex+1],
                             'z':temp.z[startindex:endindex+1],
                             'id':[file[:-4]]*(endindex-startindex+1)})
        
        timeseries = pd.concat([timeseries,trim])
        
        # Time between each sampling time point (ms)
        # difference = np.diff(millisecond[1:-1]) # Exclude first(startindex) and last(endindex) timepoints        
        # samplerateMS[file[:-4]] = np.mean(difference) # Average sampling rate for each subject (ms)
        # samplerateHZ[file[:-4]] = np.divide(1000,samplerateMS[file[:-4]]) # Average sampling rate for each subject (Hz)
        
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',function+'.pkl')
    timeseries.to_pickle(os.path.join('.',outputdir,function+'.pkl')) # To re-load: timeseries = pd.read_pickle(file_name)
        
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done') 
    return timeseries



In [3]:

    
timeseries = disco_parser('testdata','','15:25:44:580','16:02:05:830')









    



11/07/2016 13:19:41 Running disco_parser
11/07/2016 13:19:41 1a.csv
11/07/2016 13:19:44 1b.csv
11/07/2016 13:19:46 2a.csv
11/07/2016 13:19:48 2b.csv
11/07/2016 13:19:50 3a.csv
11/07/2016 13:19:52 3b.csv
11/07/2016 13:19:54 4a.csv
11/07/2016 13:19:56 4b.csv
11/07/2016 13:19:58 5b.csv
11/07/2016 13:20:00 6a.csv
11/07/2016 13:20:03 6b.csv
11/07/2016 13:20:05 Saving disco_parser.pkl
11/07/2016 13:20:07 Done



In [4]:

    
timeseries.head()

Create SQL database for unprocessed accelerometer data.



In [5]:

    
def get_db(dbfile):
    '''Connect to database.'''
    sqlite_db = sqlite3.connect(os.path.join('.',dbfile))
    return sqlite_db

def init_db(dbfile,schema):
    '''Create the database.'''
    db = get_db(dbfile)
    db.cursor().executescript(schema)
    db.commit()
    return db



In [6]:

    
disco_schema = """
DROP TABLE IF EXISTS "unprocessed";
CREATE TABLE "unprocessed" (
    "id" VARCHAR,
    "t" FLOAT,
    "x" FLOAT,
    "y" FLOAT,
    "z" FLOAT
);
"""

db = init_db("disco_parser.db",disco_schema) # Initialize the database
timeseries.to_sql("unprocessed",db,if_exists="replace",index=False) # Populate the database

db.commit() # Commit changes to the database
db.close() # Close the connection to the database

Synchrony condition times

Convert condition times to milliseconds from 00:00:00:000.



In [7]:

    
def disco_millisecond(outputdir,fileID,events):
    ''' Converts time to milliseconds from 00:00:00:000
    outputdir = Output data directory (timeMSEC,timeNAME)
    fileID = File identifier ('conditions','wholesongs','songparts')
    events = Event time and name strings ('HH:MM:SS:MS','NAME')
    Returns: eventtimes = Events' data (name,msec) '''
    
    function = 'disco_millisecond'
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function,fileID)
    
    timeMSEC,timeNAME = [],[]
    for i in range(len(events)):
        
        print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),events[i][1])
        
        timeNUM = [int(num) for num in events[i][0].split(':')]
        timeMSEC.append(timeNUM[0]*60*60*1000 + timeNUM[1]*60*1000 + timeNUM[2]*1000 + timeNUM[3])
        timeNAME.append(events[i][1])
    
    eventtimes = pd.DataFrame({'name':timeNAME,'msec':timeMSEC})
    
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',function+'_'+fileID+'.pkl')
    eventtimes.to_pickle(os.path.join('.',outputdir,function+'_'+fileID+'.pkl'))
    
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done') 
    return eventtimes



In [8]:

    
eventtimes = disco_millisecond('','conditions',
   [('15:25:44:580','Non-Synchrony 1'),
    ('15:29:53:240','Synchrony 1 (Choreographed)'),
    ('15:33:37:380','Non-Synchrony 2'),
    ('15:36:51:310','Synchrony 2 (Anti-Phase)'),
    ('15:40:56:120','Non-Synchrony 3'),
    ('15:44:34:950','Synchrony 3 (Magnitude)'),
    ('15:47:48:610','Non-Synchrony 4'),
    ('15:50:59:640','Synchrony 4 (Delay)'),
    ('15:54:25:740','Non-Synchrony 5'),
    ('15:58:17:370','Synchrony 5 (Non-Choreographed)'),
    ('16:02:05:830','End Time')])









    



11/07/2016 13:20:21 Running disco_millisecond conditions
11/07/2016 13:20:21 Non-Synchrony 1
11/07/2016 13:20:21 Synchrony 1 (Choreographed)
11/07/2016 13:20:21 Non-Synchrony 2
11/07/2016 13:20:21 Synchrony 2 (Anti-Phase)
11/07/2016 13:20:21 Non-Synchrony 3
11/07/2016 13:20:21 Synchrony 3 (Magnitude)
11/07/2016 13:20:21 Non-Synchrony 4
11/07/2016 13:20:21 Synchrony 4 (Delay)
11/07/2016 13:20:21 Non-Synchrony 5
11/07/2016 13:20:21 Synchrony 5 (Non-Choreographed)
11/07/2016 13:20:21 End Time
11/07/2016 13:20:21 Saving disco_millisecond_conditions.pkl
11/07/2016 13:20:21 Done



In [9]:

    
eventtimes









    Out[9]:






  
    
      
      msec
      name
    
  
  
    
      0
      55544580
      Non-Synchrony 1
    
    
      1
      55793240
      Synchrony 1 (Choreographed)
    
    
      2
      56017380
      Non-Synchrony 2
    
    
      3
      56211310
      Synchrony 2 (Anti-Phase)
    
    
      4
      56456120
      Non-Synchrony 3
    
    
      5
      56674950
      Synchrony 3 (Magnitude)
    
    
      6
      56868610
      Non-Synchrony 4
    
    
      7
      57059640
      Synchrony 4 (Delay)
    
    
      8
      57265740
      Non-Synchrony 5
    
    
      9
      57497370
      Synchrony 5 (Non-Choreographed)
    
    
      10
      57725830
      End Time

Create SQL database for condition times.



In [10]:

    
msec_schema = """
DROP TABLE IF EXISTS "conditions";
CREATE TABLE "conditions" (
    "name" VARCHAR,
    "msec" FLOAT
);
"""

db = init_db("disco_millisecond_conditions.db",msec_schema)
eventtimes.to_sql("conditions",db,if_exists="replace",index=False)

db.commit()
db.close()

	id	t	x	y	z
15837	1a	55544529	-0.833	0.151	0.054
15838	1a	55544606	-0.848	0.218	0.078
15839	1a	55544610	-0.844	0.300	0.074
15840	1a	55544612	-0.833	0.313	0.093
15841	1a	55544617	-0.835	0.261	0.132

	msec	name
0	55544580	Non-Synchrony 1
1	55793240	Synchrony 1 (Choreographed)
2	56017380	Non-Synchrony 2
3	56211310	Synchrony 2 (Anti-Phase)
4	56456120	Non-Synchrony 3
5	56674950	Synchrony 3 (Magnitude)
6	56868610	Non-Synchrony 4
7	57059640	Synchrony 4 (Delay)
8	57265740	Non-Synchrony 5
9	57497370	Synchrony 5 (Non-Choreographed)
10	57725830	End Time