Exploratory Response Times

Looking at spectrograms of different triggers with different response times. My hypothesis is that there is some difference in power depending on how the long it takes for the patient to vocalize a response. This would be related to sort of inherent memory vs. actively searching for the memory.

The different response times can all be categorized by looking at the events struct. Split it correspondingly by different probe words, targetwords, or word match pairs and analyze the distribution.

I will determine if the distributions of response times for different probewords are the same, and for different unique word match pairs.

Things to Change:

1. Patient dir if you analyze a new patient
``````

In [1]:

# Import Necessary Libraries
import numpy as np
import os, csv, json
import math
import random

import itertools
import matplotlib
from matplotlib import *
from matplotlib import pyplot as plt

import scipy.io
from sklearn.decomposition import PCA

# pretty charting
import seaborn as sns
sns.set_palette('muted')
sns.set_style('darkgrid')

%matplotlib inline

``````
``````

In [2]:

######## Get list of files (.mat) we want to work with ########
filedir = '../condensed_data/freq_probeTovocal_binned/'
files = []

for file in os.listdir(filedir):
if file.endswith('.mat'):
files.append(file)

######## Load in EVENTS struct to find correct events
eventsDir = '../NIH034/behavioral/paRemap/' + 'events.mat'

events = events['events']

# print number of incorrect events and which words they belonged to
incorrectIndices = events['isCorrect'] == 0
incorrectEvents = events[incorrectIndices]
incorrectWords = []
wordList = {}
for i in range(0, len(incorrectEvents)):
incorrectWords.append(incorrectEvents['probeWord'][i][0])

for word in np.unique(incorrectEvents['probeWord']):
wordList[str(word)] = sum(incorrectWords == word)

print "There were ",len(incorrectEvents), " number of incorrect events."
print "The list of incorrect probe words: \n", wordList
#
# get only correct events
correctIndices = events['isCorrect'] == 1
events = events[correctIndices]

print "\nThis is the length of the events struct with only correct responses: ", len(events)
print "There are ", len(files), " files inside our directory"
print "This should match the number of channels."

``````
``````

There were  49  number of incorrect events.
The list of incorrect probe words:
{"[u'PANTS']": 7, "[u'JUICE']": 8, "[u'BRICK']": 12, "[u'CLOCK']": 13, "[u'GLASS']": 9}

This is the length of the events struct with only correct responses:  1431
There are  96  files inside our directory
This should match the number of channels.

``````

Extract Important Events Information

Response Times, etc.

``````

In [3]:

# convert responsetime into a list
responseTimes = []
for i in range(0, len(events)):
responseTimes.append(events['responseTime'][i][0][0])
#     print events['responseTime']
#     break
responseTimes = np.array(responseTimes)

# create a dictionary of response times for each probe word
wordResponseTimes = {}
for word in np.unique(incorrectEvents['probeWord']):
wordIndices = events['probeWord'] == word
print events.dtype

``````
``````

[('experiment', 'O'), ('subject', 'O'), ('sessionName', 'O'), ('sessionNum', 'O'), ('type', 'O'), ('msoffset', 'O'), ('mstime', 'O'), ('probeWord', 'O'), ('targetWord', 'O'), ('isCorrect', 'O'), ('blocknumber', 'O'), ('miniblocknumber', 'O'), ('matchOnTime', 'O'), ('probeOffTime', 'O'), ('fixationOnTime', 'O'), ('fixationOffTime', 'O'), ('responseTime', 'O'), ('vocalizedWord', 'O'), ('eegfile', 'O'), ('eegoffset', 'O')]

``````
``````

In [4]:

################## LOOPING THROUGH EACH CHANNEL ##################
feature_dict = {} # the dict to hold the feature matrix for each channel

for f in range(0, len(files)):
#################### Set up data from the channel's mat file ####################
# Go through each .mat file
mat_file = filedir + files[f]

data = data['data']

## 01: reformat unique trigger types
uniqueTrigTypes = data['uniqueTrigType'][0][0][0]
buff = []
for trig in uniqueTrigTypes:
buff.append(str(trig[0]))
uniqueTrigTypes = buff

## 02: reformat trigger types
trigTypes = data['trigType'][0][0][0]
buff = []
for trig in trigTypes:
buff.append(str(trig[0]))
trigTypes = buff

## 03: get channel number
chanNum = data['chanNum'][0][0][0][0]

## 04: get channel string
chanStr = data['chanStr'][0][0][0]

## 05: get power matrix Z is a #events X #freq. bands
matrix = data['powerMatZ'][0][0]

## 06: get freq band ticks and ylabels
freqBandYtick = data['freqBandYtick'][0][0][0]
freqBandYlabel = data['freqBandYlabel'][0][0][0]
buff = []
for freq in freqBandYlabel:
buff.append(str(freq[0]))
freqBandYlabel = buff

#################### Getting those events and the corresonding averaged powermat  ####################
## Get events of interest
TRIGGER_TYPES = uniqueTrigTypes
probeWords = events['probeWord']
targetWords = events['targetWord']

# number of frequency bins
num_freqs = len(freqBandYtick) - 1
# total number of "data centers"
#     num_features = len(TRIGGER_TYPES)*len(tempTargets)
features = {}

for i in range(0,len(TRIGGER_TYPES)): # LOOP THRU EACH PROBEWORD
current_trig = TRIGGER_TYPES[i]

## 01: get indices of the current trigger and get those events
tempInd = events['probeWord'] == current_trig
tempEvents = events[tempInd]
tempTargets = np.unique(tempEvents['targetWord'])

## 02: go through each target word for this probeword
for j in range(0, len(tempTargets)):
targetWord = tempTargets[j][0] # set target word

# get the indices of the events we want probe/target match
eventInd = events['probeWord'] == current_trig
eventInd2 = events['targetWord'] == targetWord
eventInd = np.array([any(tup) for tup in zip(eventInd, eventInd2)])

# get the matrix we want and average across all events
thisMat = np.mean(matrix[eventInd,:], axis=0)
# -> a 7x1 vector that represents this match for this channel

feature_key = str(current_trig) + '_' + str(targetWord)

features[feature_key] = thisMat
# clear vars
eventInd2 = 0

# turn features into np array and append to a dict of the features
#     features = np.array(features)
feature_dict[str(f+1)] = features

print "The final feature dictionary will have features from the "
print "following channels: ", sorted(feature_dict.keys())
print "Each channel has these features: ", feature_dict['2'].keys()
print "With shape of: ", feature_dict['2']['JUICE_GLASS'].shape

``````
``````

The final feature dictionary will have features from the
following channels:  ['1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '5', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '6', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '7', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '8', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '9', '90', '91', '92', '93', '94', '95', '96']
Each channel has these features:  ['JUICE_GLASS', 'BRICK_JUICE', 'CLOCK_GLASS', 'PANTS_GLASS', 'GLASS_JUICE', 'BRICK_CLOCK', 'GLASS_PANTS', 'PANTS_BRICK', 'GLASS_CLOCK', 'JUICE_BRICK', 'BRICK_PANTS', 'CLOCK_BRICK']
With shape of:  (7,)

``````
``````

In [31]:

# test = responseTimes != 0
print max(responseTimes)
print min(responseTimes)
print responseTimes
plt.hist(responseTimes)

``````
``````

3830.65771484
676.918457031
[ 1739.0793457   1396.83227539  2609.80737305 ...,  1144.66210938
1151.40600586  1184.86621094]

Out[31]:

(array([ 165.,  745.,  317.,  104.,   45.,   21.,   16.,   10.,    6.,    2.]),
array([  676.91845703,   992.29238281,  1307.66630859,  1623.04023438,
1938.41416016,  2253.78808594,  2569.16201172,  2884.5359375 ,
3199.90986328,  3515.28378906,  3830.65771484]),
<a list of 10 Patch objects>)

``````
``````

In [38]:

print wordResponseTimes.keys()
plt.figure(figsize=(10, 10))
i = 1
for key in wordResponseTimes.keys():
responseTime = wordResponseTimes[key]
plt.subplot(5, 1, i)
plt.hist(responseTime)
plt.title("For Probeword: " + key)
i += 1
plt.tight_layout()

``````
``````

['JUICE', 'GLASS', 'BRICK', 'PANTS', 'CLOCK']

``````
``````

In [19]:

print wordResponseTimes.keys()

percentile_responseTimes = {}

plt.figure(figsize=(10, 10))
i = 1
for key in wordResponseTimes.keys():
percentile = np.percentile(wordResponseTimes[key], 90)
responseTime = wordResponseTimes[key][wordResponseTimes[key] < percentile]
plt.subplot(5, 1, i)
plt.hist(responseTime)
plt.title("For Probeword: " + key)
i += 1

percentile_responseTimes[key] = responseTime
plt.tight_layout()

juice = percentile_responseTimes['JUICE']
glass = percentile_responseTimes['GLASS']
brick = percentile_responseTimes['BRICK']
pants = percentile_responseTimes['PANTS']
clock = percentile_responseTimes['CLOCK']

# Ks 1 sample test
print scipy.stats.ks_2samp(juice, glass)
print scipy.stats.ks_2samp(juice, brick)
print scipy.stats.ks_2samp(juice, pants)
print scipy.stats.ks_2samp(glass, pants)
print scipy.stats.ks_2samp(brick, pants)
print scipy.stats.ks_2samp(glass, clock)
print scipy.stats.ks_2samp(brick, glass)
print scipy.stats.ks_2samp(brick, clock)

``````
``````

['JUICE', 'GLASS', 'BRICK', 'PANTS', 'CLOCK']
Ks_2sampResult(statistic=0.11768219832735966, pvalue=0.050345319614906216)
Ks_2sampResult(statistic=0.17481466639951915, pvalue=0.0006041054830149018)
Ks_2sampResult(statistic=0.14450598232782313, pvalue=0.019004368003896797)
Ks_2sampResult(statistic=0.23887189942235815, pvalue=4.9520399098733666e-07)
Ks_2sampResult(statistic=0.10556157045985526, pvalue=0.1032780558121716)
Ks_2sampResult(statistic=0.1122004357298475, pvalue=0.079476030461080108)
Ks_2sampResult(statistic=0.25452419292999007, pvalue=1.068255129327557e-09)
Ks_2sampResult(statistic=0.33111070515162588, pvalue=1.3517785242495158e-12)

``````

Testing Differences in Response Time Distribution

Here, I use the nonparametric Kolmogorov-smirnov test statistic to determine whether the various response time distributions are the same, or not.

Should I run a quantile filter? Since there seems to be a decent amount of outliers in terms of response times

``````

In [56]:

print wordResponseTimes.keys()

for key in wordResponseTimes.keys():
print wordResponseTimes[key].shape

juice = wordResponseTimes['JUICE']
glass = wordResponseTimes['GLASS']
brick = wordResponseTimes['BRICK']
pants = wordResponseTimes['PANTS']
clock = wordResponseTimes['CLOCK']

# Ks 1 sample test
print scipy.stats.ks_2samp(juice, glass)
print scipy.stats.ks_2samp(juice, brick)
print scipy.stats.ks_2samp(juice, pants)
print scipy.stats.ks_2samp(glass, pants)
print scipy.stats.ks_2samp(brick, pants)
print scipy.stats.ks_2samp(glass, clock)
print scipy.stats.ks_2samp(brick, glass)
print scipy.stats.ks_2samp(brick, clock)

``````
``````

['JUICE', 'GLASS', 'BRICK', 'PANTS', 'CLOCK']
(242,)
(361,)
(358,)
(243,)
(227,)
Ks_2sampResult(statistic=0.10514869165083224, pvalue=0.075800257106005922)
Ks_2sampResult(statistic=0.15864074980377674, pvalue=0.0011924007999215126)
Ks_2sampResult(statistic=0.12985069550726119, pvalue=0.030415943669708533)
Ks_2sampResult(statistic=0.21411716425566846, pvalue=2.4710818977871132e-06)
Ks_2sampResult(statistic=0.095638779685955355, pvalue=0.1336845235499069)
Ks_2sampResult(statistic=0.099942645856468282, pvalue=0.11619386767953627)
Ks_2sampResult(statistic=0.22985499620854544, pvalue=7.8317130441076093e-09)
Ks_2sampResult(statistic=0.29809514434080675, pvalue=2.1998681227156155e-11)

``````

Testing Simulation For KS Test Statistic

Here, I demonstrate the power of the Kolmogorov-Smirnov Test Statistic with varying sample sizes.

Ideally we would have around 1000 samples per each distribution, but that is not feasible.

Other ways we could improve the power of this test would be to bootstrap our sample data to increase our overall sample size.

``````

In [48]:

# define number of subjects per class
np.random.seed(123456789)  # for reproducibility, set random seed

S = np.array((4, 6, 8, 10, 14, 18, 20, 26, 30, 40,
50, 60, 70, 80, 100, 120, 150, 200, 250,
300, 400, 500, 750, 1000, 1500, 2000,
3000, 5000))
alpha = 0.05
N = 50 # # samples at each iteration

``````
``````

In [51]:

pow_null = np.array((), dtype=np.dtype('float64'))
powks_null = np.array((), dtype=np.dtype('float64'))
powks2_null = np.array((), dtype=np.dtype('float64'))

# compute this statistic for various sizes of datasets
for s in S:
# compute this many times for each operating point to get average
kspval = np.array((), dtype=np.dtype('float64'))
ks2pval = np.array((), dtype=np.dtype('float64'))

for _ in itertools.repeat(None,N):
g0 = np.random.uniform(0, 1, s) # (null)
g1 = np.random.uniform(0, 1, s) # null

# compute Kolmogorov-test statistic on generated data
ks2test_stat = scipy.stats.ks_2samp(g0, g1)
ks2pval = np.append(pval, ks2test_stat.pvalue)

kstest_stat = scipy.stats.kstest(g0, 'uniform', args=(0,1))
kspval = np.append(pval, kstest_stat.pvalue)

# record average p value at operating point
powks_null = np.append(powks_null, np.sum(1.0*(kspval < alpha))/N)
powks2_null = np.append(powks2_null, np.sum(1.0*(ks2pval < alpha))/N)

``````
``````

/Users/adam2392/anaconda/lib/python2.7/site-packages/scipy/stats/morestats.py:2384: UserWarning: Warning: sample size too small for normal approximation.
warnings.warn("Warning: sample size too small for normal approximation.")

``````
``````

In [52]:

pow_alt = np.array((), dtype=np.dtype('float64'))
powks_alt = np.array((), dtype=np.dtype('float64'))
powks2_alt = np.array((), dtype=np.dtype('float64'))

# compute this statistic for various sizes of datasets
for s in S:

# compute this many times for each operating point to get average
kspval = np.array((), dtype=np.dtype('float64'))
ks2pval = np.array((), dtype=np.dtype('float64'))

for _ in itertools.repeat(None,N):
g0 = np.random.uniform(0.2, 0.9, s) # (null)
g1 = np.random.uniform(0, 1, s) # alternative

# compute Kolmogorov-test statistic on generated data
ks2test_stat = scipy.stats.ks_2samp(g0, g1)
ks2pval = np.append(pval, ks2test_stat.pvalue)

kstest_stat = scipy.stats.kstest(g0, 'uniform', args=(0,1))
kspval = np.append(pval, kstest_stat.pvalue)

# record average p value at operating point
powks_alt = np.append(powks_alt, np.sum(1.0*(kspval < alpha))/N)
powks2_alt = np.append(powks2_alt, np.sum(1.0*(ks2pval < alpha))/N)

``````
``````

In [59]:

# Plotting power vs. n on the null set
fig = plt.figure(figsize=(10,10))
plt.subplot(312)
plt.scatter(S, powks_null, hold=True, label='null')
plt.scatter(S, powks_alt, color='green', hold=True, label='alt')
plt.xscale('log')
plt.xlabel('number of samples')
plt.ylabel('power')
plt.title('Strength of Uniform Classification under With Ks one sample test')
plt.axhline(alpha, color='red', linestyle='--', label='alpha')
plt.axvline(350, color='black', linestyle='--', label='n of our data')
plt.axvline(227, color='black', linestyle='--', label='n of our data')
plt.legend(loc=5)
plt.show()

# Plotting power vs. n on the null set
fig = plt.figure(figsize=(10,10))
plt.subplot(313)
plt.scatter(S, powks2_null, hold=True, label='null')
plt.scatter(S, powks2_alt, color='green', hold=True, label='alt')
plt.xscale('log')
plt.xlabel('number of samples')
plt.ylabel('power')
plt.title('Strength of Uniform Classification under With Ks two sample test')
plt.axhline(alpha, color='red', linestyle='--', label='alpha')
plt.axvline(350, color='black', linestyle='--', label='n of our data')
plt.axvline(227, color='black', linestyle='--', label='n of our data')
plt.legend(loc=5)
plt.show()

``````
``````

``````
``````

In [ ]:

``````