Psych 45: Stroop stats



In [15]:

    
%matplotlib inline

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style='ticks', context='poster', font_scale=1.5)

Import data file



In [16]:

    
data = pd.read_csv('./stroop_stats_2018.csv')
data.drop('when', axis=1, inplace=True)

data.time_normal = data.time_normal.str.strip(' ms').str.replace(',', '').astype(float)
data.time_interfere = data.time_interfere.str.strip(' ms').str.replace(',', '').astype(float)

data['time_diff'] = data.time_interfere - data.time_normal

Remove outliers



In [17]:

    
max_rt = data.time_interfere.mean() + 2*data.time_interfere.std()
data = data.loc[data.time_interfere < max_rt]
max_rt = data.time_normal.mean() + 2*data.time_normal.std()
data = data.loc[data.time_normal < max_rt]



In [18]:

    
data.head()









    Out[18]:







  
    
      
      pct_correct
      time_normal
      time_interfere
      time_diff
    
  
  
    
      0
      100.00
      937.57
      1153.92
      216.35
    
    
      1
      98.31
      1117.21
      1319.99
      202.78
    
    
      3
      100.00
      790.58
      932.55
      141.97
    
    
      4
      96.67
      1074.92
      1282.81
      207.89
    
    
      5
      83.87
      555.85
      734.36
      178.51

Analyses

Summary stats



In [19]:

    
data.describe()









    Out[19]:







  
    
      
      pct_correct
      time_normal
      time_interfere
      time_diff
    
  
  
    
      count
      116.000000
      116.000000
      116.000000
      116.000000
    
    
      mean
      96.432241
      1010.982500
      1261.611034
      250.628534
    
    
      std
      5.858936
      344.158317
      435.298976
      190.845231
    
    
      min
      60.000000
      462.330000
      476.640000
      -214.240000
    
    
      25%
      95.000000
      763.982500
      997.275000
      121.862500
    
    
      50%
      100.000000
      947.600000
      1185.915000
      220.070000
    
    
      75%
      100.000000
      1189.147500
      1438.257500
      332.082500
    
    
      max
      100.000000
      2242.220000
      2745.000000
      854.350000

What is the distribution of overall accuracy?



In [20]:

    
g = sns.distplot(data.pct_correct, rug=True,
                 color='dodgerblue')
g.set_xlabel('% correct')
sns.despine(trim=True)

How does condition affect response time?



In [21]:

    
data_long = pd.melt(data, ['pct_correct'])
data_long = data_long.loc[data_long.variable.isin(['time_normal', 'time_interfere'])]
data_long.loc[data_long.variable == 'time_normal', 'variable'] = 'congruent'
data_long.loc[data_long.variable == 'time_interfere', 'variable'] = 'incongruent'



In [22]:

    
g = sns.factorplot(x='variable', y='value', 
                   aspect=1.5, ci=95,
                   data=data_long, palette='Set2')
g.set_ylabels('RT (ms)')
g.set_xlabels('condition')
plt.locator_params(nbins=5)

What is the distribution of RTs for incongruent vs. congruent trials?

How much longer does it take to respond to an incongruent vs. a congruent trial?



In [23]:

    
g = sns.distplot(data.time_diff, rug=True, 
                 color='mediumpurple', vertical=True)
g.set_ylabel('RT for incongruent > congruent trials (ms)')
g.hlines(0, 0, .003, linestyles='dashed')
sns.despine(trim=True)



In [ ]:



In [ ]:

	pct_correct	time_normal	time_interfere	time_diff
0	100.00	937.57	1153.92	216.35
1	98.31	1117.21	1319.99	202.78
3	100.00	790.58	932.55	141.97
4	96.67	1074.92	1282.81	207.89
5	83.87	555.85	734.36	178.51

	pct_correct	time_normal	time_interfere	time_diff
count	116.000000	116.000000	116.000000	116.000000
mean	96.432241	1010.982500	1261.611034	250.628534
std	5.858936	344.158317	435.298976	190.845231
min	60.000000	462.330000	476.640000	-214.240000
25%	95.000000	763.982500	997.275000	121.862500
50%	100.000000	947.600000	1185.915000	220.070000
75%	100.000000	1189.147500	1438.257500	332.082500
max	100.000000	2242.220000	2745.000000	854.350000