In [1]:
%matplotlib inline
import time
import os
import sys
import sqlite3
import pandas as pd
import numpy as np
import scipy as sp
import statsmodels.formula.api as smf
import scipy.io
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
plotly.offline.init_notebook_mode()
Calculate mean synchrony timeseries across the middle frequency bands for each pipeline.
In [2]:
def disco_matimport(indir,outdir,startlev,endlev):
''' Import Matlab variables into Pandas table
Returns: sync = Mean of synchrony values across frequency bands for each pipeline '''
function = 'disco_matimport'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function,indir)
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Number of files =',len(os.listdir(os.path.join('.',indir))))
i = 0
sync = pd.DataFrame({})
for file in os.listdir(os.path.join('.',indir)):
i += 1
sys.stdout.write(str(i)+', ')
sys.stdout.flush()
syncvar = file.split('_')[1].upper()
matvar = scipy.io.loadmat(os.path.join('.',indir,file),variable_names=syncvar)
avgvar = np.mean(matvar[syncvar][startlev:endlev],0) # Mean of synchrony timeseries across frequency bands
# Store information in columns
temp = pd.DataFrame({file[len(file.split('_')[0])+len('_'):-4]:avgvar})
sync = pd.concat([sync,temp],axis=1)
# Store information in rows
# avgvar = pd.DataFrame.transpose(pd.DataFrame(avgvar))
# pipeline = pd.DataFrame({'pipeline':[file[:-4]]})
# temp = pd.concat([pipeline,avgvar],axis=1)
# sync = pd.concat([sync,temp],ignore_index=True)
print(' ')
outfile = function+'_'+indir+'.pkl'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',outfile)
sync.to_pickle(os.path.join('.',outdir,outfile))
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done')
return sync
In [18]:
VAL = disco_matimport('validity','',5,10)
In [19]:
VAL.head()
Out[19]:
Create database with the averaged synchrony timeseries.
In [26]:
analysis = 'validity'
columns = list(VAL)
dframe = VAL
schema = 'DROP TABLE IF EXISTS "' + analysis + '"; CREATE TABLE "' + analysis + '" ('
for i in range(len(columns)):
schema += '"' + columns[i] + '" FLOAT'
if i < len(columns)-1:
schema += ', '
schema += ');'
dbase = sqlite3.connect('disco_matimport_' + analysis + '.db')
dbase.cursor().executescript(schema); dbase.commit()
dframe.to_sql(analysis,dbase,if_exists='replace',index=False); dbase.commit()
dbase.close()
Load pipeline synchrony database.
In [2]:
new_db = sqlite3.connect('disco_matimport_validity.db')
Load condition times database.
In [3]:
time_db = sqlite3.connect('disco_millisecond_conditions.db')
Compute correlations of synchrony timecourse from different pipelines with condition timecourse.
In [41]:
def disco_validity(indata,outdir,timedata,samplerate):
''' Correlation test of synchrony timecourse from different
pipelines with condition timecourse from the pilot experiment
indata = Database of synchrony measures files
outdir = Output data directory
timedata = Database containing condition times in milliseconds
samplerate = Sampling rate in ms
(e.g., ceil(mean of actual rate across Ss) = ceil(15.4120) = 16 ms)
Returns: RHO = Correlations with condition timecourse '''
function = 'disco_validity'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function)
# Condition times in milliseconds
query = "SELECT msec FROM conditions;"
timeMSEC = pd.read_sql_query(query,timedata)
timeMSEC = timeMSEC['msec'].values
# List of preprocessing pipelines (columns)
query = "PRAGMA table_info(validity);"
preproc = [elem[1] for elem in indata.cursor().execute(query)]
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Number of pipelines =',len(preproc))
i = 0
RHO = pd.DataFrame(columns=('preproc','R','P'))
for P in preproc:
i += 1
sys.stdout.write(str(i)+', ')
sys.stdout.flush()
query = "SELECT " + P + " FROM validity;"
meanSYNC = pd.read_sql_query(query,indata)
meanSYNC = meanSYNC[pd.notnull(meanSYNC[P])] # Remove NaN value at last element (CPM)
meanSYNC = meanSYNC[P].values
# Timecourse defining synchrony (1) and non-synchrony (0) conditions
condSYNC = np.zeros(len(meanSYNC))
for t in range(0+1,len(timeMSEC),2): # For each synchrony condition (2 timepoints)
first = int(np.ceil((timeMSEC[t]-timeMSEC[0])/samplerate)) # First timepoint
last = int(np.ceil((timeMSEC[t+1]-timeMSEC[0])/samplerate)) # Last timepoint
if last > len(condSYNC):
last = len(condSYNC)
condSYNC[first:last] = np.ones(last-first)
# Correlation test of condition and synchrony timecourses
result = sp.stats.pearsonr(condSYNC,meanSYNC)
RHO.loc[RHO.shape[0]] = [P,result[0],result[1]]
print(' ')
outfile = function+'.pkl'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',outfile)
RHO.to_pickle(os.path.join('.',outdir,outfile))
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done')
return RHO
In [42]:
RHO = disco_validity(new_db,'',time_db,int(np.ceil(15.4120)))
In [44]:
RHO.head()
Out[44]:
Save correlations in database.
In [45]:
dframe = RHO
schema = """
DROP TABLE IF EXISTS "RHO";
CREATE TABLE "RHO" (
"preproc" VARCHAR,
"R" FLOAT,
"P" FLOAT
);
"""
dbase = sqlite3.connect('disco_validity.db')
dbase.cursor().executescript(schema); dbase.commit()
dframe.to_sql("RHO",dbase,if_exists='replace',index=False); dbase.commit()
dbase.close()
Group correlations by factor levels (i.e., pipeline parameters) using Pandas.
In [4]:
# Load discriminability database
db = sqlite3.connect('disco_validity.db')
query = "SELECT * FROM RHO;"
df = pd.read_sql_query(query,db)
db.close()
df.head()
Out[4]:
In [5]:
# Replace vanishing moments with filter lengths
df['preproc'] = df['preproc'].str.replace('1|2','_short')
df['preproc'] = df['preproc'].str.replace('3|4','_medium')
df['preproc'] = df['preproc'].str.replace('5|6','_long')
In [6]:
df.head()
Out[6]:
In [7]:
df.tail()
Out[7]:
In [8]:
# Retrieve correlations for each factor level
paramstr = ['cpm','ips', # Synchrony measure
'coif','db','sym', # Wavelet family
'short','medium','long', # Filter length
'decimate','mean', # Downsampling
'cubic','linear','nearest', # Interpolation
'vector','xalign','yalign','zalign'] # Combination
paramdf = pd.DataFrame(columns=['parameter','correlations'])
for p in paramstr:
c = df[df['preproc'].str.contains(p)].R.values
paramdf.loc[paramdf.shape[0]] = [p,c]
In [9]:
paramdf
Out[9]:
Group correlations by factor levels (i.e., pipeline parameters) using SQLite3.
In [2]:
# Load discriminability database
db = sqlite3.connect('disco_validity.db')
query = "SELECT * FROM RHO WHERE preproc LIKE 'cpm_coif%decimate_cubic_vector';"
out = db.cursor().execute(query); print(out.fetchall())
query = "SELECT * FROM RHO WHERE preproc LIKE 'cpm_db%decimate_cubic_vector';"
out = db.cursor().execute(query); print(out.fetchall())
query = "SELECT * FROM RHO WHERE preproc LIKE 'cpm_sym%decimate_cubic_vector';"
out = db.cursor().execute(query); print(out.fetchall())
In [3]:
# Replace vanishing moments with filter lengths
query = "UPDATE RHO SET preproc = REPLACE(preproc,'1','_short');"; db.cursor().execute(query)
query = "UPDATE RHO SET preproc = REPLACE(preproc,'2','_short');"; db.cursor().execute(query)
query = "UPDATE RHO SET preproc = REPLACE(preproc,'3','_medium');"; db.cursor().execute(query)
query = "UPDATE RHO SET preproc = REPLACE(preproc,'4','_medium');"; db.cursor().execute(query)
query = "UPDATE RHO SET preproc = REPLACE(preproc,'5','_long');"; db.cursor().execute(query)
query = "UPDATE RHO SET preproc = REPLACE(preproc,'6','_long');"; db.cursor().execute(query)
db.commit()
In [4]:
query = "SELECT * FROM RHO WHERE preproc LIKE 'cpm_coif%decimate_cubic_vector';"
out = db.cursor().execute(query); print(out.fetchall())
query = "SELECT * FROM RHO WHERE preproc LIKE 'cpm_db%decimate_cubic_vector';"
out = db.cursor().execute(query); print(out.fetchall())
query = "SELECT * FROM RHO WHERE preproc LIKE 'cpm_sym%decimate_cubic_vector';"
out = db.cursor().execute(query); print(out.fetchall())
In [5]:
# Retrieve correlations for each factor level
paramstr = ['cpm','ips', # Synchrony measure
'coif','db','sym', # Wavelet family
'short','medium','long', # Filter length
'decimate','mean', # Downsampling
'cubic','linear','nearest', # Interpolation
'vector','xalign','yalign','zalign'] # Combination
paramdf = pd.DataFrame(columns=['parameter','correlations'])
for p in paramstr:
query = "SELECT R FROM RHO WHERE preproc LIKE '%" + p +"%';"
c = pd.read_sql_query(query,db).R.values
paramdf.loc[paramdf.shape[0]] = [p,c]
In [6]:
paramdf
Out[6]:
In [7]:
# Save table and close database
paramdf.to_pickle('disco_validity_params.pkl')
db.close()
Compare correlations from different preprocessing parameters.
In [2]:
# Load pipeline correlations
db = sqlite3.connect('disco_validity.db')
query = "SELECT * FROM RHO;"
df = pd.read_sql_query(query,db)
db.close()
In [3]:
# Split pipeline names into factor levels or parameters
params = pd.DataFrame(df['preproc'].str.split('_').tolist(),columns=['sync','wave','filt','down','interp','comb'])
df = pd.concat([df,params],axis=1)
df.head()
Out[3]:
In [4]:
# Convert correlations to Fisher's z
Fz = pd.DataFrame(dict(Fz=np.arctanh(df['R']).values))
df = pd.concat([df,Fz],axis=1)
df.head()
Out[4]:
In [5]:
# Multiple regression of transformed correlations on categorical preprocessing parameters
est = smf.ols(formula='Fz~C(sync)+C(wave)+C(filt)+C(down)+C(interp)+C(comb)',data=df).fit()
est.summary().tables[1]
Out[5]:
In [6]:
# Interaction between significant factors: synchrony measure and axes combination
est = smf.ols(formula='Fz~C(sync)*C(comb)',data=df).fit()
est.summary().tables[1]
Out[6]:
In [10]:
# Bar chart of interaction
combolist = ['vector','xalign','yalign','zalign']
comboaxis = np.arange(len(combolist))+1
cpm = [np.tanh(np.mean(df[(df.sync=='cpm')&(df.comb==combo)]['Fz'].values)) for combo in combolist]
ips = [np.tanh(np.mean(df[(df.sync=='ips')&(df.comb==combo)]['Fz'].values)) for combo in combolist]
cpm_e = [np.tanh(sp.stats.sem(df[(df.sync=='cpm')&(df.comb==combo)]['Fz'].values)) for combo in combolist]
ips_e = [np.tanh(sp.stats.sem(df[(df.sync=='ips')&(df.comb==combo)]['Fz'].values)) for combo in combolist]
with sns.axes_style("whitegrid"):
plt.figure(); plt.clf()
ax1 = plt.subplot(121); ax1.bar(comboaxis,cpm,yerr=cpm_e)
ax2 = plt.subplot(122); ax2.bar(comboaxis,ips,yerr=ips_e)
val="Discriminability (Pearson's r)"; ax1.set_ylabel(val); # ax2.set_ylabel(val)
val="Axes Combination"; ax1.set_xlabel(val); ax2.set_xlabel(val)
val="Synchrony Measure: CPM"; ax1.set_title(val)
val="Synchrony Measure: IPS"; ax2.set_title(val)
val=('vector','xalign','yalign','zalign'); ax1.set_xticklabels(val); ax2.set_xticklabels(val)
val=comboaxis+.5; ax1.set_xticks(val); ax2.set_xticks(val)
val=[.75,5,-.6,.6]; ax1.axis(val); ax2.axis(val)
Box and whisker plots of parameter correlations.
In [ ]:
paramname = {'cpm':'Synchrony: CPM','ips':'Synchrony: IPS',
'coif':'Wavelet: Coiflet','db':'Wavelet: Daubechies','sym':'Wavelet: Symlet',
'short':'Filter Length: Short','medium':'Filter Length: Medium','long':'Filter Length: Long',
'decimate':'Downsampling: Decimate','mean':'Downsampling: Average',
'cubic':'Interpolation: Cubic','linear':'Interpolation: Linear','nearest':'Interpolation: Nearest',
'vector':'Combination: Standard','xalign':'Combination: X-align',
'yalign':'Combination: Y-align','zalign':'Combination: Z-align'}
# Number of boxes (parameters)
N = paramdf.shape[0]
# Generate an array of rainbow colors
c = ['hsl('+str(h)+',50%'+',50%)' for h in np.linspace(0,360,num=N)]
# Each box is a dictionary containing the data, type, and color; Shows the mean for each box
data = [{'name':paramname[paramdf.loc[i][0]],
'y':paramdf.loc[i][1],
'type':'box',
'marker':{'color':c[i]},
'boxmean':True}
for i in range(N)]
# Format the layout
layout = {'xaxis':{'showticklabels':False},
'yaxis':{'title':"Discriminability (Pearson's r)",
'range':[-0.6,0.6],'tickformat':'.3f',
'zeroline':False},
'title':'Preprocessing and Analysis Pipelines: Parameter Discriminability'}
# Plot the data
fig = {'data':data,'layout':layout}
plotly.offline.iplot(fig)
Sort pipeline correlations from highest to lowest.
In [17]:
db = sqlite3.connect('disco_validity.db')
query = "SELECT * FROM RHO ORDER BY R DESC;"
df = pd.read_sql_query(query,db)
db.close()
df[0:25]
Out[17]:
Bar plots of highest pipeline correlations.
In [ ]:
N = 25 # Top 25 pipelines
pipename = list(df[0:N].preproc)
pipename = [p.replace('_',', ') for p in pipename]
pipename = [p.replace('mean','average') for p in pipename]
pipetext = ['']*N
pipetext[pipename.index('ips, coif, short, average, linear, zalign')] = 'Highest reliability'
pipecolor = ['rgb(158,202,225)']*N
pipecolor[pipename.index('ips, coif, short, average, linear, zalign')] = 'rgb(8,48,107)'
data = [plotly.graph_objs.Bar(
x = pipename,
y = list(df[0:N].R),
text = pipetext,
marker = dict(
color = pipecolor,
line = dict(
color = 'rgb(8,48,107)',
width = 1.5
)
),
opacity = 0.6
)]
# Format the layout
layout = {'xaxis':{'tickangle':315},
'yaxis':{'title':"Discriminability (Pearson's r)",
'range':[0,1],'tickformat':'.3f'},
'title':'Preprocessing and Analysis Pipelines: Highest Discriminability',
'showlegend':False,
'margin':{'b':225,'l':150}}
# Plot the data
fig = plotly.graph_objs.Figure(data=data,layout=layout)
plotly.offline.iplot(fig)
Calculate mean synchrony timeseries across the middle frequency bands, for each pipeline, for each pair of devices.
In [3]:
REL = disco_matimport('reliability','',5,10)
Create database with the averaged synchrony timeseries for each pair of devices.
In [4]:
analysis = 'reliability'
columns = list(REL)
dframe = REL
schema = 'DROP TABLE IF EXISTS "' + analysis + '"; CREATE TABLE "' + analysis + '" ('
for i in range(len(columns)):
schema += '"' + columns[i] + '" FLOAT'
if i < len(columns)-1:
schema += ', '
schema += ');'
dbase = sqlite3.connect('disco_matimport_' + analysis + '.db')
dbase.cursor().executescript(schema); dbase.commit()
dframe.to_sql(analysis,dbase,if_exists='replace',index=False); dbase.commit()
dbase.close()
In [5]:
REL.head()
Out[5]:
Load pipeline synchrony database.
In [3]:
new_db = sqlite3.connect('disco_matimport_reliability.db')
Compute intraclass correlations of synchrony timecourses from the paired devices.
In [4]:
def disco_reliability(indata,outdir):
''' Reliablity test of synchrony timecourse from different pipelines
indata = Database of synchrony measures files
outdir = Output data directory
Returns: REL = Intraclass correlations between two devices '''
function = 'disco_reliability'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function)
# List of preprocessing pipelines (columns) from Group A
query = "PRAGMA table_info(reliability);"
preproc = [elem[1] for elem in indata.cursor().execute(query)]
preproc = [elem for elem in preproc if 'groupA' in elem]
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Number of pipelines =',len(preproc))
# Type of ICC: A-1, Case 3 [i.e., A-ICC(3,1)]
# Absolute agreement among measurements; Two-way mixed effects single measures
# Model 3 = Each subject is assessed by each rater, but the raters are the only raters of interest
# Form 1 = Reliability calculated from a single measurement
# Two-way random effects [A-1, Case 2 or A-ICC(2,1)] and A-1, Case 3
# have equivalent calculation and only differ in interpretation
i = 0
REL = pd.DataFrame(columns=('preproc','R')) # P ?
for P in preproc:
i += 1
sys.stdout.write(str(i)+', ')
sys.stdout.flush()
query = "SELECT " + P + " FROM reliability;" # Group A
A = pd.read_sql_query(query,indata)
A = A[pd.notnull(A[P])] # Remove NaN value at last element (CPM)
x = A[P].values
query = "SELECT " + P.replace("groupA","groupB") + " FROM reliability;" # Group B
B = pd.read_sql_query(query,indata)
B = B[pd.notnull(B[P.replace("groupA","groupB")])] # Remove NaN value at last element (CPM)
y = B[P.replace("groupA","groupB")].values
# Reliability test of synchrony timecourses
# http://stats.stackexchange.com/questions/63368/intra-class-correlation-and-experimental-design
Sx = sum(x); Sy = sum(y);
Sxx = sum(x*x); Sxy = sum((x+y)**2)/2; Syy = sum(y*y)
n = len(x)
fact = ((Sx + Sy)**2)/(n*2)
SS_tot = Sxx + Syy - fact
SS_among = Sxy - fact
SS_error = SS_tot - SS_among
MS_error = SS_error/n
MS_among = SS_among/(n-1)
ICC = (MS_among - MS_error) / (MS_among + MS_error)
REL.loc[REL.shape[0]] = [P,ICC]
print(' ')
outfile = function+'.pkl'
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',outfile)
REL.to_pickle(os.path.join('.',outdir,outfile))
print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done')
return REL
In [5]:
REL = disco_reliability(new_db,'')
Save correlations in database.
In [9]:
dframe = REL
schema = """
DROP TABLE IF EXISTS "REL";
CREATE TABLE "REL" (
"preproc" VARCHAR,
"R" FLOAT,
"P" FLOAT
);
"""
dbase = sqlite3.connect('disco_reliability.db')
dbase.cursor().executescript(schema); dbase.commit()
dframe.to_sql("REL",dbase,if_exists='replace',index=False); dbase.commit()
dbase.close()
Group correlations by factor levels (i.e., pipeline parameters) using Pandas.
In [8]:
# Pipeline names
valdb = sqlite3.connect('disco_validity.db')
query = "SELECT preproc FROM RHO;"
relpip = pd.read_sql_query(query,valdb)
valdb.close()
# ICCs and p-values
relmat = scipy.io.loadmat('matlabfiles/disco_reliability.mat',variable_names=['REL','preproc'])
relcor = pd.DataFrame({'R':pd.DataFrame(relmat['REL'])[0]})
relpvl = pd.DataFrame({'P':pd.DataFrame(relmat['REL'])[6]})
# Reliability database
dframe = pd.concat([relpip,relcor,relpvl],axis=1)
schema = """
DROP TABLE IF EXISTS "ICC";
CREATE TABLE "ICC" (
"preproc" VARCHAR,
"R" FLOAT,
"P" FLOAT
);
"""
dbase = sqlite3.connect('disco_reliability.db')
dbase.cursor().executescript(schema); dbase.commit()
dframe.to_sql("ICC",dbase,if_exists='replace',index=False); dbase.commit()
dbase.close()
In [9]:
dframe.head()
Out[9]:
In [10]:
dframe.tail()
Out[10]:
In [11]:
# Retrieve reliabilities for each factor level
paramstr = ['cpm','ips', # Synchrony measure
'coif','db','sym', # Wavelet family
'short','medium','long', # Filter length
'decimate','mean', # Downsampling
'cubic','linear','nearest', # Interpolation
'vector','xalign','yalign','zalign'] # Combination
paramdf = pd.DataFrame(columns=['parameter','correlations'])
for p in paramstr:
c = dframe[dframe['preproc'].str.contains(p)].R.values
paramdf.loc[paramdf.shape[0]] = [p,c]
In [12]:
paramdf
Out[12]:
In [13]:
# Save table
paramdf.to_pickle('disco_reliability_params.pkl')
Compare reliabilities from different preprocessing parameters.
In [3]:
# Load pipeline reliabilities
db = sqlite3.connect('disco_reliability.db')
query = "SELECT * FROM ICC;"
df = pd.read_sql_query(query,db)
db.close()
In [4]:
# Split pipeline names into factor levels or parameters
params = pd.DataFrame(df['preproc'].str.split('_').tolist(),columns=['sync','wave','filt','down','interp','comb'])
df = pd.concat([df,params],axis=1)
df.head()
Out[4]:
In [5]:
# Convert correlations to Fisher's z
Fz = pd.DataFrame(dict(Fz=np.arctanh(df['R']).values))
df = pd.concat([df,Fz],axis=1)
df.head()
Out[5]:
In [6]:
# Multiple regression of transformed correlations on categorical preprocessing parameters
est = smf.ols(formula='Fz~C(sync)+C(wave)+C(filt)+C(down)+C(interp)+C(comb)',data=df).fit()
est.summary().tables[1]
Out[6]:
Box and whisker plots of parameter reliabilities.
In [ ]:
paramname = {'cpm':'Synchrony: CPM','ips':'Synchrony: IPS',
'coif':'Wavelet: Coiflet','db':'Wavelet: Daubechies','sym':'Wavelet: Symlet',
'short':'Filter Length: Short','medium':'Filter Length: Medium','long':'Filter Length: Long',
'decimate':'Downsampling: Decimate','mean':'Downsampling: Average',
'cubic':'Interpolation: Cubic','linear':'Interpolation: Linear','nearest':'Interpolation: Nearest',
'vector':'Combination: Standard','xalign':'Combination: X-align',
'yalign':'Combination: Y-align','zalign':'Combination: Z-align'}
# Number of boxes (parameters)
N = paramdf.shape[0]
# Generate an array of rainbow colors
c = ['hsl('+str(h)+',50%'+',50%)' for h in np.linspace(0,360,num=N)]
# Each box is a dictionary containing the data, type, and color; Shows the mean for each box
data = [{'name':paramname[paramdf.loc[i][0]],
'y':paramdf.loc[i][1],
'type':'box',
'marker':{'color':c[i]},
'boxmean':True}
for i in range(N)]
# Format the layout
layout = {'xaxis':{'showticklabels':False},
'yaxis':{'title':"Reliability (Intraclass correlation)",
'range':[-0.6,0.8],'tickformat':'.3f',
'zeroline':False},
'title':'Preprocessing and Analysis Pipelines: Parameter Reliability'}
# Plot the data
fig = {'data':data,'layout':layout}
plotly.offline.iplot(fig)
Sort pipeline reliabilities from highest to lowest.
In [26]:
relsort = dframe.sort_values('R',ascending=False)
relsort[0:25]
Out[26]:
Bar plots of highest pipeline reliabilities.
In [ ]:
N = 25 # Top 25 pipelines
pipename = list(relsort[0:N].preproc)
pipename = [p.replace('_',', ') for p in pipename]
pipename = [p.replace('mean','average') for p in pipename]
pipetext = ['']*N
pipetext[pipename.index('ips, db, long, decimate, linear, zalign')] = 'Highest discriminability'
pipecolor = ['rgb(158,202,225)']*N
pipecolor[pipename.index('ips, db, long, decimate, linear, zalign')] = 'rgb(8,48,107)'
data = [plotly.graph_objs.Bar(
x = pipename,
y = list(relsort[0:N].R),
text = pipetext,
marker = dict(
color = pipecolor,
line = dict(
color = 'rgb(8,48,107)',
width = 1.5
)
),
opacity = 0.6
)]
# Format the layout
layout = {'xaxis':{'tickangle':315},
'yaxis':{'title':"Reliability (Intraclass correlation)",
'range':[0,1],'tickformat':'.3f'},
'title':'Preprocessing and Analysis Pipelines: Highest Reliability',
'showlegend':False,
'margin':{'b':225,'l':150}}
# Plot the data
fig = plotly.graph_objs.Figure(data=data,layout=layout)
plotly.offline.iplot(fig)