Validation



In [1]:

    
%matplotlib inline

import time
import os
import sys

import sqlite3

import pandas as pd
import numpy as np
import scipy as sp

import statsmodels.formula.api as smf

import scipy.io

import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt

import seaborn as sns

import plotly
plotly.offline.init_notebook_mode()

Discriminability

Calculate mean synchrony timeseries across the middle frequency bands for each pipeline.



In [2]:

    
def disco_matimport(indir,outdir,startlev,endlev):
    ''' Import Matlab variables into Pandas table 
    Returns: sync = Mean of synchrony values across frequency bands for each pipeline '''

    function = 'disco_matimport'
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function,indir)
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Number of files =',len(os.listdir(os.path.join('.',indir))))
    
    i = 0
    sync = pd.DataFrame({})
    for file in os.listdir(os.path.join('.',indir)):
        
        i += 1
        sys.stdout.write(str(i)+', ')
        sys.stdout.flush()
        
        syncvar = file.split('_')[1].upper()
        matvar = scipy.io.loadmat(os.path.join('.',indir,file),variable_names=syncvar)
        avgvar = np.mean(matvar[syncvar][startlev:endlev],0) # Mean of synchrony timeseries across frequency bands        
        
        # Store information in columns
        temp = pd.DataFrame({file[len(file.split('_')[0])+len('_'):-4]:avgvar})
        sync = pd.concat([sync,temp],axis=1)        
        
        # Store information in rows
        # avgvar = pd.DataFrame.transpose(pd.DataFrame(avgvar))
        # pipeline = pd.DataFrame({'pipeline':[file[:-4]]})
        # temp = pd.concat([pipeline,avgvar],axis=1)        
        # sync = pd.concat([sync,temp],ignore_index=True)

    print(' ')
        
    outfile = function+'_'+indir+'.pkl'
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',outfile)
    sync.to_pickle(os.path.join('.',outdir,outfile))
    
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done')        
    return sync



In [18]:

    
VAL = disco_matimport('validity','',5,10)









    



11/09/2016 15:49:17 Running disco_matimport validity
11/09/2016 15:49:17 Number of files = 432
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432,  
11/09/2016 15:55:28 Saving disco_matimport_validity.pkl
11/09/2016 15:55:37 Done



In [19]:

    
VAL.head()









    Out[19]:






  
    
      
      cpm_coif1_decimate_cubic_vector
      cpm_coif1_decimate_cubic_xalign
      cpm_coif1_decimate_cubic_yalign
      cpm_coif1_decimate_cubic_zalign
      cpm_coif1_decimate_linear_vector
      cpm_coif1_decimate_linear_xalign
      cpm_coif1_decimate_linear_yalign
      cpm_coif1_decimate_linear_zalign
      cpm_coif1_decimate_nearest_vector
      cpm_coif1_decimate_nearest_xalign
      ...
      ips_sym6_mean_cubic_yalign
      ips_sym6_mean_cubic_zalign
      ips_sym6_mean_linear_vector
      ips_sym6_mean_linear_xalign
      ips_sym6_mean_linear_yalign
      ips_sym6_mean_linear_zalign
      ips_sym6_mean_nearest_vector
      ips_sym6_mean_nearest_xalign
      ips_sym6_mean_nearest_yalign
      ips_sym6_mean_nearest_zalign
    
  
  
    
      0
      0.411086
      0.449412
      0.483949
      0.526640
      0.406081
      0.451630
      0.484344
      0.527490
      0.410913
      0.450989
      ...
      0.346234
      0.517742
      0.360442
      0.477407
      0.362911
      0.528230
      0.363596
      0.449737
      0.355297
      0.530667
    
    
      1
      0.406029
      0.449216
      0.483830
      0.529568
      0.401735
      0.451696
      0.483964
      0.530395
      0.407795
      0.450598
      ...
      0.362532
      0.515705
      0.361382
      0.478330
      0.358006
      0.526258
      0.364685
      0.450874
      0.350665
      0.529080
    
    
      2
      0.395510
      0.447991
      0.483617
      0.532305
      0.395438
      0.450906
      0.483510
      0.533129
      0.401583
      0.449556
      ...
      0.357903
      0.513587
      0.362428
      0.479330
      0.353370
      0.524315
      0.365736
      0.452098
      0.346359
      0.527423
    
    
      3
      0.394070
      0.446268
      0.483077
      0.535114
      0.393310
      0.449722
      0.482721
      0.535948
      0.395880
      0.448236
      ...
      0.353117
      0.511502
      0.363594
      0.480408
      0.348911
      0.522390
      0.366878
      0.453408
      0.342222
      0.525601
    
    
      4
      0.401674
      0.443921
      0.482273
      0.537661
      0.397049
      0.448084
      0.481633
      0.538565
      0.396605
      0.446561
      ...
      0.347529
      0.509458
      0.364846
      0.481153
      0.344580
      0.520324
      0.368071
      0.454589
      0.338178
      0.523747
    
  

5 rows × 432 columns

Create database with the averaged synchrony timeseries.



In [26]:

    
analysis = 'validity'
columns = list(VAL)
dframe = VAL

schema = 'DROP TABLE IF EXISTS "' + analysis + '"; CREATE TABLE "' + analysis + '" ('
for i in range(len(columns)):
    schema += '"' + columns[i] + '" FLOAT'
    if i < len(columns)-1:
        schema += ', '
schema += ');'

dbase = sqlite3.connect('disco_matimport_' + analysis + '.db')
dbase.cursor().executescript(schema); dbase.commit()
dframe.to_sql(analysis,dbase,if_exists='replace',index=False); dbase.commit()
dbase.close()

Load pipeline synchrony database.



In [2]:

    
new_db = sqlite3.connect('disco_matimport_validity.db')

Load condition times database.



In [3]:

    
time_db = sqlite3.connect('disco_millisecond_conditions.db')

Compute correlations of synchrony timecourse from different pipelines with condition timecourse.



In [41]:

    
def disco_validity(indata,outdir,timedata,samplerate):
    ''' Correlation test of synchrony timecourse from different 
    pipelines with condition timecourse from the pilot experiment
    indata = Database of synchrony measures files
    outdir = Output data directory
    timedata = Database containing condition times in milliseconds
    samplerate = Sampling rate in ms 
    (e.g., ceil(mean of actual rate across Ss) = ceil(15.4120) = 16 ms)    
    Returns: RHO = Correlations with condition timecourse '''
    
    function = 'disco_validity'
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function)           
    
    # Condition times in milliseconds
    query = "SELECT msec FROM conditions;"
    timeMSEC = pd.read_sql_query(query,timedata)
    timeMSEC = timeMSEC['msec'].values
    
    # List of preprocessing pipelines (columns)
    query = "PRAGMA table_info(validity);"
    preproc = [elem[1] for elem in indata.cursor().execute(query)]
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Number of pipelines =',len(preproc))
    
    i = 0
    RHO = pd.DataFrame(columns=('preproc','R','P'))
    for P in preproc:
        
        i += 1
        sys.stdout.write(str(i)+', ')
        sys.stdout.flush()
        
        query = "SELECT " + P + " FROM validity;"
        meanSYNC = pd.read_sql_query(query,indata)
        meanSYNC = meanSYNC[pd.notnull(meanSYNC[P])] # Remove NaN value at last element (CPM)
        meanSYNC = meanSYNC[P].values
        
        # Timecourse defining synchrony (1) and non-synchrony (0) conditions
        condSYNC = np.zeros(len(meanSYNC))
        for t in range(0+1,len(timeMSEC),2): # For each synchrony condition (2 timepoints)
            first = int(np.ceil((timeMSEC[t]-timeMSEC[0])/samplerate)) # First timepoint
            last = int(np.ceil((timeMSEC[t+1]-timeMSEC[0])/samplerate)) # Last timepoint
            if last > len(condSYNC):
                last = len(condSYNC)     
            condSYNC[first:last] = np.ones(last-first)
        
        # Correlation test of condition and synchrony timecourses
        result = sp.stats.pearsonr(condSYNC,meanSYNC)
        RHO.loc[RHO.shape[0]] = [P,result[0],result[1]]    
        
    print(' ')
        
    outfile = function+'.pkl'
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',outfile)
    RHO.to_pickle(os.path.join('.',outdir,outfile))
    
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done')        
    return RHO



In [42]:

    
RHO = disco_validity(new_db,'',time_db,int(np.ceil(15.4120)))









    



11/09/2016 20:22:28 Running disco_validity
11/09/2016 20:22:28 Number of pipelines = 432
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432,  
11/09/2016 20:39:29 Saving disco_validity.pkl
11/09/2016 20:39:29 Done



In [44]:

    
RHO.head()









    Out[44]:






  
    
      
      preproc
      R
      P
    
  
  
    
      0
      cpm_coif1_decimate_cubic_vector
      -0.443024
      0.000000e+00
    
    
      1
      cpm_coif1_decimate_cubic_xalign
      -0.073459
      1.962499e-162
    
    
      2
      cpm_coif1_decimate_cubic_yalign
      -0.052611
      3.636690e-84
    
    
      3
      cpm_coif1_decimate_cubic_zalign
      0.252733
      0.000000e+00
    
    
      4
      cpm_coif1_decimate_linear_vector
      -0.444704
      0.000000e+00

Save correlations in database.



In [45]:

    
dframe = RHO
schema = """
DROP TABLE IF EXISTS "RHO"; 
CREATE TABLE "RHO" (
    "preproc" VARCHAR,
    "R" FLOAT,
    "P" FLOAT
);
"""
dbase = sqlite3.connect('disco_validity.db')
dbase.cursor().executescript(schema); dbase.commit()
dframe.to_sql("RHO",dbase,if_exists='replace',index=False); dbase.commit()
dbase.close()

Group correlations by factor levels (i.e., pipeline parameters) using Pandas.



In [4]:

    
# Load discriminability database
db = sqlite3.connect('disco_validity.db')
query = "SELECT * FROM RHO;"
df = pd.read_sql_query(query,db)
db.close()
df.head()









    Out[4]:






  
    
      
      preproc
      R
      P
    
  
  
    
      0
      cpm_coif1_decimate_cubic_vector
      -0.443024
      0.000000e+00
    
    
      1
      cpm_coif1_decimate_cubic_xalign
      -0.073459
      1.962499e-162
    
    
      2
      cpm_coif1_decimate_cubic_yalign
      -0.052611
      3.636690e-84
    
    
      3
      cpm_coif1_decimate_cubic_zalign
      0.252733
      0.000000e+00
    
    
      4
      cpm_coif1_decimate_linear_vector
      -0.444704
      0.000000e+00



In [5]:

    
# Replace vanishing moments with filter lengths
df['preproc'] = df['preproc'].str.replace('1|2','_short')
df['preproc'] = df['preproc'].str.replace('3|4','_medium')
df['preproc'] = df['preproc'].str.replace('5|6','_long')



In [6]:

    
df.head()









    Out[6]:






  
    
      
      preproc
      R
      P
    
  
  
    
      0
      cpm_coif_short_decimate_cubic_vector
      -0.443024
      0.000000e+00
    
    
      1
      cpm_coif_short_decimate_cubic_xalign
      -0.073459
      1.962499e-162
    
    
      2
      cpm_coif_short_decimate_cubic_yalign
      -0.052611
      3.636690e-84
    
    
      3
      cpm_coif_short_decimate_cubic_zalign
      0.252733
      0.000000e+00
    
    
      4
      cpm_coif_short_decimate_linear_vector
      -0.444704
      0.000000e+00



In [7]:

    
df.tail()









    Out[7]:






  
    
      
      preproc
      R
      P
    
  
  
    
      427
      ips_sym_long_mean_linear_zalign
      0.478266
      0.0
    
    
      428
      ips_sym_long_mean_nearest_vector
      0.388934
      0.0
    
    
      429
      ips_sym_long_mean_nearest_xalign
      0.302790
      0.0
    
    
      430
      ips_sym_long_mean_nearest_yalign
      -0.393082
      0.0
    
    
      431
      ips_sym_long_mean_nearest_zalign
      0.476917
      0.0



In [8]:

    
# Retrieve correlations for each factor level

paramstr = ['cpm','ips', # Synchrony measure
            'coif','db','sym', # Wavelet family
            'short','medium','long', # Filter length
            'decimate','mean', # Downsampling
            'cubic','linear','nearest', # Interpolation
            'vector','xalign','yalign','zalign'] # Combination

paramdf = pd.DataFrame(columns=['parameter','correlations'])
for p in paramstr:
    c = df[df['preproc'].str.contains(p)].R.values
    paramdf.loc[paramdf.shape[0]] = [p,c]



In [9]:

    
paramdf









    Out[9]:






  
    
      
      parameter
      correlations
    
  
  
    
      0
      cpm
      [-0.443023934066, -0.0734588191007, -0.0526109...
    
    
      1
      ips
      [0.419601904344, 0.298482899634, -0.4459811573...
    
    
      2
      coif
      [-0.443023934066, -0.0734588191007, -0.0526109...
    
    
      3
      db
      [-0.401646998013, -0.0663185161795, -0.0501602...
    
    
      4
      sym
      [-0.401646998013, -0.0663185161795, -0.0501602...
    
    
      5
      short
      [-0.443023934066, -0.0734588191007, -0.0526109...
    
    
      6
      medium
      [-0.366862499275, -0.0555178888118, -0.0589029...
    
    
      7
      long
      [-0.295433564528, -0.0546943131961, -0.0723451...
    
    
      8
      decimate
      [-0.443023934066, -0.0734588191007, -0.0526109...
    
    
      9
      mean
      [-0.443069044138, -0.0734711210756, -0.0526133...
    
    
      10
      cubic
      [-0.443023934066, -0.0734588191007, -0.0526109...
    
    
      11
      linear
      [-0.44470389173, -0.0647630371939, -0.05051637...
    
    
      12
      nearest
      [-0.443457036385, -0.0661041183197, -0.0508083...
    
    
      13
      vector
      [-0.443023934066, -0.44470389173, -0.443457036...
    
    
      14
      xalign
      [-0.0734588191007, -0.0647630371939, -0.066104...
    
    
      15
      yalign
      [-0.0526109918102, -0.0505163771805, -0.050808...
    
    
      16
      zalign
      [0.252732857003, 0.267245346902, 0.26694937003...

Group correlations by factor levels (i.e., pipeline parameters) using SQLite3.



In [2]:

    
# Load discriminability database
db = sqlite3.connect('disco_validity.db')
query = "SELECT * FROM RHO WHERE preproc LIKE 'cpm_coif%decimate_cubic_vector';"
out = db.cursor().execute(query); print(out.fetchall())
query = "SELECT * FROM RHO WHERE preproc LIKE 'cpm_db%decimate_cubic_vector';"
out = db.cursor().execute(query); print(out.fetchall())
query = "SELECT * FROM RHO WHERE preproc LIKE 'cpm_sym%decimate_cubic_vector';"
out = db.cursor().execute(query); print(out.fetchall())









    



[('cpm_coif1_decimate_cubic_vector', -0.4430239340659711, 0.0), ('cpm_coif3_decimate_cubic_vector', -0.36686249927528225, 0.0), ('cpm_coif5_decimate_cubic_vector', -0.2954335645276906, 0.0)]
[('cpm_db2_decimate_cubic_vector', -0.4016469980134443, 0.0), ('cpm_db4_decimate_cubic_vector', -0.41140742561604376, 0.0), ('cpm_db6_decimate_cubic_vector', -0.41315003238599723, 0.0)]
[('cpm_sym2_decimate_cubic_vector', -0.4016469980134443, 0.0), ('cpm_sym4_decimate_cubic_vector', -0.41094072529979264, 0.0), ('cpm_sym6_decimate_cubic_vector', -0.3601261034255285, 0.0)]



In [3]:

    
# Replace vanishing moments with filter lengths
query = "UPDATE RHO SET preproc = REPLACE(preproc,'1','_short');"; db.cursor().execute(query)
query = "UPDATE RHO SET preproc = REPLACE(preproc,'2','_short');"; db.cursor().execute(query)
query = "UPDATE RHO SET preproc = REPLACE(preproc,'3','_medium');"; db.cursor().execute(query)
query = "UPDATE RHO SET preproc = REPLACE(preproc,'4','_medium');"; db.cursor().execute(query)
query = "UPDATE RHO SET preproc = REPLACE(preproc,'5','_long');"; db.cursor().execute(query)
query = "UPDATE RHO SET preproc = REPLACE(preproc,'6','_long');"; db.cursor().execute(query)
db.commit()



In [4]:

    
query = "SELECT * FROM RHO WHERE preproc LIKE 'cpm_coif%decimate_cubic_vector';"
out = db.cursor().execute(query); print(out.fetchall())
query = "SELECT * FROM RHO WHERE preproc LIKE 'cpm_db%decimate_cubic_vector';"
out = db.cursor().execute(query); print(out.fetchall())
query = "SELECT * FROM RHO WHERE preproc LIKE 'cpm_sym%decimate_cubic_vector';"
out = db.cursor().execute(query); print(out.fetchall())









    



[('cpm_coif_short_decimate_cubic_vector', -0.4430239340659711, 0.0), ('cpm_coif_medium_decimate_cubic_vector', -0.36686249927528225, 0.0), ('cpm_coif_long_decimate_cubic_vector', -0.2954335645276906, 0.0)]
[('cpm_db_short_decimate_cubic_vector', -0.4016469980134443, 0.0), ('cpm_db_medium_decimate_cubic_vector', -0.41140742561604376, 0.0), ('cpm_db_long_decimate_cubic_vector', -0.41315003238599723, 0.0)]
[('cpm_sym_short_decimate_cubic_vector', -0.4016469980134443, 0.0), ('cpm_sym_medium_decimate_cubic_vector', -0.41094072529979264, 0.0), ('cpm_sym_long_decimate_cubic_vector', -0.3601261034255285, 0.0)]



In [5]:

    
# Retrieve correlations for each factor level

paramstr = ['cpm','ips', # Synchrony measure
            'coif','db','sym', # Wavelet family
            'short','medium','long', # Filter length
            'decimate','mean', # Downsampling
            'cubic','linear','nearest', # Interpolation
            'vector','xalign','yalign','zalign'] # Combination

paramdf = pd.DataFrame(columns=['parameter','correlations'])
for p in paramstr:
    query = "SELECT R FROM RHO WHERE preproc LIKE '%" + p +"%';"  
    c = pd.read_sql_query(query,db).R.values
    paramdf.loc[paramdf.shape[0]] = [p,c]



In [6]:

    
paramdf









    Out[6]:






  
    
      
      parameter
      correlations
    
  
  
    
      0
      cpm
      [-0.443023934066, -0.0734588191007, -0.0526109...
    
    
      1
      ips
      [0.419601904344, 0.298482899634, -0.4459811573...
    
    
      2
      coif
      [-0.443023934066, -0.0734588191007, -0.0526109...
    
    
      3
      db
      [-0.401646998013, -0.0663185161795, -0.0501602...
    
    
      4
      sym
      [-0.401646998013, -0.0663185161795, -0.0501602...
    
    
      5
      short
      [-0.443023934066, -0.0734588191007, -0.0526109...
    
    
      6
      medium
      [-0.366862499275, -0.0555178888118, -0.0589029...
    
    
      7
      long
      [-0.295433564528, -0.0546943131961, -0.0723451...
    
    
      8
      decimate
      [-0.443023934066, -0.0734588191007, -0.0526109...
    
    
      9
      mean
      [-0.443069044138, -0.0734711210756, -0.0526133...
    
    
      10
      cubic
      [-0.443023934066, -0.0734588191007, -0.0526109...
    
    
      11
      linear
      [-0.44470389173, -0.0647630371939, -0.05051637...
    
    
      12
      nearest
      [-0.443457036385, -0.0661041183197, -0.0508083...
    
    
      13
      vector
      [-0.443023934066, -0.44470389173, -0.443457036...
    
    
      14
      xalign
      [-0.0734588191007, -0.0647630371939, -0.066104...
    
    
      15
      yalign
      [-0.0526109918102, -0.0505163771805, -0.050808...
    
    
      16
      zalign
      [0.252732857003, 0.267245346902, 0.26694937003...



In [7]:

    
# Save table and close database
paramdf.to_pickle('disco_validity_params.pkl')
db.close()

Compare correlations from different preprocessing parameters.



In [2]:

    
# Load pipeline correlations
db = sqlite3.connect('disco_validity.db')
query = "SELECT * FROM RHO;"
df = pd.read_sql_query(query,db)
db.close()



In [3]:

    
# Split pipeline names into factor levels or parameters
params = pd.DataFrame(df['preproc'].str.split('_').tolist(),columns=['sync','wave','filt','down','interp','comb'])
df = pd.concat([df,params],axis=1)
df.head()









    Out[3]:






  
    
      
      preproc
      R
      P
      sync
      wave
      filt
      down
      interp
      comb
    
  
  
    
      0
      cpm_coif_short_decimate_cubic_vector
      -0.443024
      0.000000e+00
      cpm
      coif
      short
      decimate
      cubic
      vector
    
    
      1
      cpm_coif_short_decimate_cubic_xalign
      -0.073459
      1.962499e-162
      cpm
      coif
      short
      decimate
      cubic
      xalign
    
    
      2
      cpm_coif_short_decimate_cubic_yalign
      -0.052611
      3.636690e-84
      cpm
      coif
      short
      decimate
      cubic
      yalign
    
    
      3
      cpm_coif_short_decimate_cubic_zalign
      0.252733
      0.000000e+00
      cpm
      coif
      short
      decimate
      cubic
      zalign
    
    
      4
      cpm_coif_short_decimate_linear_vector
      -0.444704
      0.000000e+00
      cpm
      coif
      short
      decimate
      linear
      vector



In [4]:

    
# Convert correlations to Fisher's z
Fz = pd.DataFrame(dict(Fz=np.arctanh(df['R']).values))
df = pd.concat([df,Fz],axis=1)
df.head()









    Out[4]:






  
    
      
      preproc
      R
      P
      sync
      wave
      filt
      down
      interp
      comb
      Fz
    
  
  
    
      0
      cpm_coif_short_decimate_cubic_vector
      -0.443024
      0.000000e+00
      cpm
      coif
      short
      decimate
      cubic
      vector
      -0.475987
    
    
      1
      cpm_coif_short_decimate_cubic_xalign
      -0.073459
      1.962499e-162
      cpm
      coif
      short
      decimate
      cubic
      xalign
      -0.073591
    
    
      2
      cpm_coif_short_decimate_cubic_yalign
      -0.052611
      3.636690e-84
      cpm
      coif
      short
      decimate
      cubic
      yalign
      -0.052660
    
    
      3
      cpm_coif_short_decimate_cubic_zalign
      0.252733
      0.000000e+00
      cpm
      coif
      short
      decimate
      cubic
      zalign
      0.258330
    
    
      4
      cpm_coif_short_decimate_linear_vector
      -0.444704
      0.000000e+00
      cpm
      coif
      short
      decimate
      linear
      vector
      -0.478079



In [5]:

    
# Multiple regression of transformed correlations on categorical preprocessing parameters
est = smf.ols(formula='Fz~C(sync)+C(wave)+C(filt)+C(down)+C(interp)+C(comb)',data=df).fit()
est.summary().tables[1]









    Out[5]:






                          coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept                -0.1414      0.036     -3.899   0.000     -0.213    -0.070


  C(sync)[T.ips]            0.2743      0.021     13.101   0.000      0.233     0.315


  C(wave)[T.db]         -8.578e-05      0.026     -0.003   0.997     -0.050     0.050


  C(wave)[T.sym]            0.0019      0.026      0.075   0.940     -0.048     0.052


  C(filt)[T.medium]        -0.0001      0.026     -0.005   0.996     -0.051     0.050


  C(filt)[T.short]         -0.0076      0.026     -0.295   0.768     -0.058     0.043


  C(down)[T.mean]       -2.747e-05      0.021     -0.001   0.999     -0.041     0.041


  C(interp)[T.linear]       0.0040      0.026      0.158   0.875     -0.046     0.054


  C(interp)[T.nearest]      0.0040      0.026      0.155   0.877     -0.046     0.054


  C(comb)[T.xalign]         0.1258      0.030      4.249   0.000      0.068     0.184


  C(comb)[T.yalign]        -0.2404      0.030     -8.120   0.000     -0.299    -0.182


  C(comb)[T.zalign]         0.4011      0.030     13.550   0.000      0.343     0.459



In [6]:

    
# Interaction between significant factors: synchrony measure and axes combination
est = smf.ols(formula='Fz~C(sync)*C(comb)',data=df).fit()
est.summary().tables[1]









    Out[6]:






                                      coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept                            -0.4126      0.004    -96.858   0.000     -0.421    -0.404


  C(sync)[T.ips]                        0.8181      0.006    135.803   0.000      0.806     0.830


  C(comb)[T.xalign]                     0.3535      0.006     58.681   0.000      0.342     0.365


  C(comb)[T.yalign]                     0.3545      0.006     58.851   0.000      0.343     0.366


  C(comb)[T.zalign]                     0.6662      0.006    110.583   0.000      0.654     0.678


  C(sync)[T.ips]:C(comb)[T.xalign]     -0.4554      0.009    -53.455   0.000     -0.472    -0.439


  C(sync)[T.ips]:C(comb)[T.yalign]     -1.1899      0.009   -139.663   0.000     -1.207    -1.173


  C(sync)[T.ips]:C(comb)[T.zalign]     -0.5301      0.009    -62.219   0.000     -0.547    -0.513



In [10]:

    
# Bar chart of interaction

combolist = ['vector','xalign','yalign','zalign']
comboaxis = np.arange(len(combolist))+1
cpm = [np.tanh(np.mean(df[(df.sync=='cpm')&(df.comb==combo)]['Fz'].values)) for combo in combolist]
ips = [np.tanh(np.mean(df[(df.sync=='ips')&(df.comb==combo)]['Fz'].values)) for combo in combolist]
cpm_e = [np.tanh(sp.stats.sem(df[(df.sync=='cpm')&(df.comb==combo)]['Fz'].values)) for combo in combolist]
ips_e = [np.tanh(sp.stats.sem(df[(df.sync=='ips')&(df.comb==combo)]['Fz'].values)) for combo in combolist]

with sns.axes_style("whitegrid"):
    
    plt.figure(); plt.clf()
    
    ax1 = plt.subplot(121); ax1.bar(comboaxis,cpm,yerr=cpm_e)
    ax2 = plt.subplot(122); ax2.bar(comboaxis,ips,yerr=ips_e)
    
    val="Discriminability (Pearson's r)"; ax1.set_ylabel(val); # ax2.set_ylabel(val)
    val="Axes Combination"; ax1.set_xlabel(val); ax2.set_xlabel(val)
    val="Synchrony Measure: CPM"; ax1.set_title(val)
    val="Synchrony Measure: IPS"; ax2.set_title(val)
    val=('vector','xalign','yalign','zalign'); ax1.set_xticklabels(val); ax2.set_xticklabels(val)
    val=comboaxis+.5; ax1.set_xticks(val); ax2.set_xticks(val)
    val=[.75,5,-.6,.6]; ax1.axis(val); ax2.axis(val)

Box and whisker plots of parameter correlations.



In [ ]:

    
paramname = {'cpm':'Synchrony: CPM','ips':'Synchrony: IPS',
             'coif':'Wavelet: Coiflet','db':'Wavelet: Daubechies','sym':'Wavelet: Symlet',
             'short':'Filter Length: Short','medium':'Filter Length: Medium','long':'Filter Length: Long',
             'decimate':'Downsampling: Decimate','mean':'Downsampling: Average',
             'cubic':'Interpolation: Cubic','linear':'Interpolation: Linear','nearest':'Interpolation: Nearest',
             'vector':'Combination: Standard','xalign':'Combination: X-align',
             'yalign':'Combination: Y-align','zalign':'Combination: Z-align'}

# Number of boxes (parameters)
N = paramdf.shape[0] 

# Generate an array of rainbow colors
c = ['hsl('+str(h)+',50%'+',50%)' for h in np.linspace(0,360,num=N)] 

# Each box is a dictionary containing the data, type, and color; Shows the mean for each box
data = [{'name':paramname[paramdf.loc[i][0]],
         'y':paramdf.loc[i][1],
         'type':'box',
         'marker':{'color':c[i]},
         'boxmean':True} 
        for i in range(N)]

# Format the layout
layout = {'xaxis':{'showticklabels':False},
          'yaxis':{'title':"Discriminability (Pearson's r)",
                   'range':[-0.6,0.6],'tickformat':'.3f',
                   'zeroline':False},
          'title':'Preprocessing and Analysis Pipelines: Parameter Discriminability'}

# Plot the data
fig = {'data':data,'layout':layout}
plotly.offline.iplot(fig)

Sort pipeline correlations from highest to lowest.



In [17]:

    
db = sqlite3.connect('disco_validity.db')
query = "SELECT * FROM RHO ORDER BY R DESC;"
df = pd.read_sql_query(query,db)
db.close()
df[0:25]









    Out[17]:






  
    
      
      preproc
      R
      P
    
  
  
    
      0
      ips_db_long_decimate_linear_zalign
      0.538547
      0.0
    
    
      1
      ips_db_long_mean_linear_zalign
      0.538441
      0.0
    
    
      2
      ips_db_long_decimate_nearest_zalign
      0.537953
      0.0
    
    
      3
      ips_db_long_mean_nearest_zalign
      0.537908
      0.0
    
    
      4
      ips_coif_short_decimate_linear_zalign
      0.533696
      0.0
    
    
      5
      ips_coif_short_mean_linear_zalign
      0.533524
      0.0
    
    
      6
      ips_coif_short_decimate_nearest_zalign
      0.533006
      0.0
    
    
      7
      ips_coif_short_mean_nearest_zalign
      0.532938
      0.0
    
    
      8
      ips_db_long_mean_cubic_zalign
      0.532047
      0.0
    
    
      9
      ips_db_long_decimate_cubic_zalign
      0.532046
      0.0
    
    
      10
      ips_coif_short_decimate_cubic_zalign
      0.528236
      0.0
    
    
      11
      ips_db_medium_decimate_linear_zalign
      0.528211
      0.0
    
    
      12
      ips_coif_short_mean_cubic_zalign
      0.528182
      0.0
    
    
      13
      ips_db_medium_mean_linear_zalign
      0.528130
      0.0
    
    
      14
      ips_db_medium_decimate_nearest_zalign
      0.527213
      0.0
    
    
      15
      ips_db_medium_mean_nearest_zalign
      0.527109
      0.0
    
    
      16
      ips_db_medium_mean_cubic_zalign
      0.520981
      0.0
    
    
      17
      ips_db_medium_decimate_cubic_zalign
      0.520934
      0.0
    
    
      18
      ips_sym_medium_decimate_linear_zalign
      0.506138
      0.0
    
    
      19
      ips_sym_medium_mean_linear_zalign
      0.506107
      0.0
    
    
      20
      ips_sym_medium_mean_nearest_zalign
      0.505032
      0.0
    
    
      21
      ips_sym_medium_decimate_nearest_zalign
      0.504978
      0.0
    
    
      22
      ips_sym_medium_mean_cubic_zalign
      0.502726
      0.0
    
    
      23
      ips_sym_medium_decimate_cubic_zalign
      0.502665
      0.0
    
    
      24
      ips_db_short_mean_linear_zalign
      0.501880
      0.0

Bar plots of highest pipeline correlations.



In [ ]:

    
N = 25 # Top 25 pipelines

pipename = list(df[0:N].preproc)
pipename = [p.replace('_',', ') for p in pipename]
pipename = [p.replace('mean','average') for p in pipename]

pipetext = ['']*N
pipetext[pipename.index('ips, coif, short, average, linear, zalign')] = 'Highest reliability'

pipecolor = ['rgb(158,202,225)']*N
pipecolor[pipename.index('ips, coif, short, average, linear, zalign')] = 'rgb(8,48,107)'

data = [plotly.graph_objs.Bar(
        x = pipename,
        y = list(df[0:N].R),
        text = pipetext,
        marker = dict(
            color = pipecolor,
            line = dict(
                color = 'rgb(8,48,107)',
                width = 1.5
                )
            ),
        opacity = 0.6
        )]

# Format the layout
layout = {'xaxis':{'tickangle':315},
          'yaxis':{'title':"Discriminability (Pearson's r)",
                   'range':[0,1],'tickformat':'.3f'},
          'title':'Preprocessing and Analysis Pipelines: Highest Discriminability',
          'showlegend':False,
          'margin':{'b':225,'l':150}}

# Plot the data
fig = plotly.graph_objs.Figure(data=data,layout=layout)
plotly.offline.iplot(fig)

Reliability

Calculate mean synchrony timeseries across the middle frequency bands, for each pipeline, for each pair of devices.



In [3]:

    
REL = disco_matimport('reliability','',5,10)









    



11/09/2016 21:40:15 Running disco_matimport reliability
11/09/2016 21:40:15 Number of files = 862
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862,  
11/09/2016 22:02:19 Saving disco_matimport_reliability.pkl
11/09/2016 22:02:37 Done

Create database with the averaged synchrony timeseries for each pair of devices.



In [4]:

    
analysis = 'reliability'
columns = list(REL)
dframe = REL

schema = 'DROP TABLE IF EXISTS "' + analysis + '"; CREATE TABLE "' + analysis + '" ('
for i in range(len(columns)):
    schema += '"' + columns[i] + '" FLOAT'
    if i < len(columns)-1:
        schema += ', '
schema += ');'

dbase = sqlite3.connect('disco_matimport_' + analysis + '.db')
dbase.cursor().executescript(schema); dbase.commit()
dframe.to_sql(analysis,dbase,if_exists='replace',index=False); dbase.commit()
dbase.close()



In [5]:

    
REL.head()









    Out[5]:






  
    
      
      cpm_groupA_coif1_decimate_cubic_vector
      cpm_groupA_coif1_decimate_cubic_xalign
      cpm_groupA_coif1_decimate_cubic_yalign
      cpm_groupA_coif1_decimate_cubic_zalign
      cpm_groupA_coif1_decimate_linear_vector
      cpm_groupA_coif1_decimate_linear_xalign
      cpm_groupA_coif1_decimate_linear_yalign
      cpm_groupA_coif1_decimate_linear_zalign
      cpm_groupA_coif1_decimate_nearest_vector
      cpm_groupA_coif1_decimate_nearest_xalign
      ...
      ips_groupB_sym6_mean_cubic_yalign
      ips_groupB_sym6_mean_cubic_zalign
      ips_groupB_sym6_mean_linear_vector
      ips_groupB_sym6_mean_linear_xalign
      ips_groupB_sym6_mean_linear_yalign
      ips_groupB_sym6_mean_linear_zalign
      ips_groupB_sym6_mean_nearest_vector
      ips_groupB_sym6_mean_nearest_xalign
      ips_groupB_sym6_mean_nearest_yalign
      ips_groupB_sym6_mean_nearest_zalign
    
  
  
    
      0
      0.418215
      0.472479
      0.419658
      0.611465
      0.414580
      0.474915
      0.398093
      0.613531
      0.414284
      0.476677
      ...
      0.386682
      0.561145
      0.442758
      0.523072
      0.407694
      0.572926
      0.444657
      0.481366
      0.401957
      0.574316
    
    
      1
      0.414860
      0.472894
      0.422960
      0.610134
      0.412113
      0.475309
      0.400779
      0.611845
      0.412794
      0.477215
      ...
      0.429352
      0.559734
      0.443273
      0.523784
      0.402313
      0.571635
      0.445445
      0.482433
      0.396942
      0.573601
    
    
      2
      0.413279
      0.472751
      0.425838
      0.608792
      0.411136
      0.475225
      0.403097
      0.609999
      0.412433
      0.477244
      ...
      0.424363
      0.558158
      0.443884
      0.524597
      0.397315
      0.570337
      0.446124
      0.483616
      0.392379
      0.572728
    
    
      3
      0.411246
      0.472116
      0.428376
      0.607484
      0.409613
      0.474663
      0.405214
      0.608146
      0.411439
      0.476744
      ...
      0.419097
      0.556589
      0.444621
      0.525509
      0.392586
      0.569016
      0.446895
      0.484911
      0.388077
      0.571552
    
    
      4
      0.408903
      0.471527
      0.429978
      0.606505
      0.407853
      0.474100
      0.406376
      0.606601
      0.410308
      0.476248
      ...
      0.412547
      0.555034
      0.445462
      0.525904
      0.388058
      0.567430
      0.447740
      0.485997
      0.383935
      0.570273
    
  

5 rows × 862 columns

Load pipeline synchrony database.



In [3]:

    
new_db = sqlite3.connect('disco_matimport_reliability.db')

Compute intraclass correlations of synchrony timecourses from the paired devices.



In [4]:

    
def disco_reliability(indata,outdir):
    ''' Reliablity test of synchrony timecourse from different pipelines
    indata = Database of synchrony measures files
    outdir = Output data directory
    Returns: REL = Intraclass correlations between two devices '''
    
    function = 'disco_reliability'
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Running',function)
    
    # List of preprocessing pipelines (columns) from Group A
    query = "PRAGMA table_info(reliability);"
    preproc = [elem[1] for elem in indata.cursor().execute(query)]
    preproc = [elem for elem in preproc if 'groupA' in elem]
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Number of pipelines =',len(preproc))
    
    # Type of ICC: A-1, Case 3 [i.e., A-ICC(3,1)]
    # Absolute agreement among measurements; Two-way mixed effects single measures
    # Model 3 = Each subject is assessed by each rater, but the raters are the only raters of interest
    # Form 1 = Reliability calculated from a single measurement
    # Two-way random effects [A-1, Case 2 or A-ICC(2,1)] and A-1, Case 3 
    # have equivalent calculation and only differ in interpretation
     
    i = 0
    REL = pd.DataFrame(columns=('preproc','R')) # P ?
    for P in preproc:
        
        i += 1
        sys.stdout.write(str(i)+', ')
        sys.stdout.flush()
        
        query = "SELECT " + P + " FROM reliability;" # Group A       
        A = pd.read_sql_query(query,indata)
        A = A[pd.notnull(A[P])] # Remove NaN value at last element (CPM)
        x = A[P].values
        
        query = "SELECT " + P.replace("groupA","groupB") + " FROM reliability;" # Group B
        B = pd.read_sql_query(query,indata)
        B = B[pd.notnull(B[P.replace("groupA","groupB")])] # Remove NaN value at last element (CPM)
        y = B[P.replace("groupA","groupB")].values
        
        # Reliability test of synchrony timecourses
        # http://stats.stackexchange.com/questions/63368/intra-class-correlation-and-experimental-design

        Sx = sum(x); Sy = sum(y);
        Sxx = sum(x*x); Sxy = sum((x+y)**2)/2; Syy = sum(y*y)

        n = len(x)
        fact = ((Sx + Sy)**2)/(n*2)
        SS_tot = Sxx + Syy - fact
        SS_among = Sxy - fact
        SS_error = SS_tot - SS_among

        MS_error = SS_error/n
        MS_among = SS_among/(n-1)

        ICC = (MS_among - MS_error) / (MS_among + MS_error)
        REL.loc[REL.shape[0]] = [P,ICC]    
        
    print(' ')
        
    outfile = function+'.pkl'
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Saving',outfile)
    REL.to_pickle(os.path.join('.',outdir,outfile))
    
    print(time.strftime("%m/%d/%Y"),time.strftime("%H:%M:%S"),'Done')        
    return REL



In [5]:

    
REL = disco_reliability(new_db,'')









    



11/13/2016 10:51:21 Running disco_reliability
11/13/2016 10:51:21 Number of pipelines = 431
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431,  
11/13/2016 11:34:52 Saving disco_reliability.pkl
11/13/2016 11:34:52 Done

Save correlations in database.



In [9]:

    
dframe = REL
schema = """
DROP TABLE IF EXISTS "REL"; 
CREATE TABLE "REL" (
    "preproc" VARCHAR,
    "R" FLOAT,
    "P" FLOAT
);
"""
dbase = sqlite3.connect('disco_reliability.db')
dbase.cursor().executescript(schema); dbase.commit()
dframe.to_sql("REL",dbase,if_exists='replace',index=False); dbase.commit()
dbase.close()

Group correlations by factor levels (i.e., pipeline parameters) using Pandas.



In [8]:

    
# Pipeline names
valdb = sqlite3.connect('disco_validity.db')
query = "SELECT preproc FROM RHO;"
relpip = pd.read_sql_query(query,valdb)
valdb.close()

# ICCs and p-values
relmat = scipy.io.loadmat('matlabfiles/disco_reliability.mat',variable_names=['REL','preproc'])
relcor = pd.DataFrame({'R':pd.DataFrame(relmat['REL'])[0]})
relpvl = pd.DataFrame({'P':pd.DataFrame(relmat['REL'])[6]})

# Reliability database
dframe = pd.concat([relpip,relcor,relpvl],axis=1)
schema = """
DROP TABLE IF EXISTS "ICC"; 
CREATE TABLE "ICC" (
    "preproc" VARCHAR,
    "R" FLOAT,
    "P" FLOAT
);
"""
dbase = sqlite3.connect('disco_reliability.db')
dbase.cursor().executescript(schema); dbase.commit()
dframe.to_sql("ICC",dbase,if_exists='replace',index=False); dbase.commit()
dbase.close()



In [9]:

    
dframe.head()









    Out[9]:






  
    
      
      preproc
      R
      P
    
  
  
    
      0
      cpm_coif_short_decimate_cubic_vector
      0.331900
      0.0
    
    
      1
      cpm_coif_short_decimate_cubic_xalign
      0.579359
      0.0
    
    
      2
      cpm_coif_short_decimate_cubic_yalign
      0.180107
      0.0
    
    
      3
      cpm_coif_short_decimate_cubic_zalign
      0.578860
      0.0
    
    
      4
      cpm_coif_short_decimate_linear_vector
      0.332233
      0.0



In [10]:

    
dframe.tail()









    Out[10]:






  
    
      
      preproc
      R
      P
    
  
  
    
      427
      ips_sym_long_mean_linear_zalign
      0.725738
      0.0
    
    
      428
      ips_sym_long_mean_nearest_vector
      0.365295
      0.0
    
    
      429
      ips_sym_long_mean_nearest_xalign
      0.706074
      0.0
    
    
      430
      ips_sym_long_mean_nearest_yalign
      -0.435204
      1.0
    
    
      431
      ips_sym_long_mean_nearest_zalign
      0.724398
      0.0



In [11]:

    
# Retrieve reliabilities for each factor level

paramstr = ['cpm','ips', # Synchrony measure
            'coif','db','sym', # Wavelet family
            'short','medium','long', # Filter length
            'decimate','mean', # Downsampling
            'cubic','linear','nearest', # Interpolation
            'vector','xalign','yalign','zalign'] # Combination

paramdf = pd.DataFrame(columns=['parameter','correlations'])
for p in paramstr:
    c = dframe[dframe['preproc'].str.contains(p)].R.values
    paramdf.loc[paramdf.shape[0]] = [p,c]



In [12]:

    
paramdf









    Out[12]:






  
    
      
      parameter
      correlations
    
  
  
    
      0
      cpm
      [0.331900174013, 0.579358714027, 0.18010708272...
    
    
      1
      ips
      [0.34461322736, 0.679160557675, -0.41354417363...
    
    
      2
      coif
      [0.331900174013, 0.579358714027, 0.18010708272...
    
    
      3
      db
      [0.241233743194, 0.481703836298, 0.15899816692...
    
    
      4
      sym
      [0.241233743194, 0.481703836298, 0.15899816692...
    
    
      5
      short
      [0.331900174013, 0.579358714027, 0.18010708272...
    
    
      6
      medium
      [0.300646309148, 0.586527673733, 0.20503517260...
    
    
      7
      long
      [0.243963752708, 0.561886034352, 0.20177121670...
    
    
      8
      decimate
      [0.331900174013, 0.579358714027, 0.18010708272...
    
    
      9
      mean
      [0.331884810745, 0.579366087792, 0.18010289161...
    
    
      10
      cubic
      [0.331900174013, 0.579358714027, 0.18010708272...
    
    
      11
      linear
      [0.332232603231, 0.580365155504, 0.18460311829...
    
    
      12
      nearest
      [0.33123645765, 0.580777150017, 0.185454189469...
    
    
      13
      vector
      [0.331900174013, 0.332232603231, 0.33123645765...
    
    
      14
      xalign
      [0.579358714027, 0.580365155504, 0.58077715001...
    
    
      15
      yalign
      [0.180107082727, 0.184603118292, 0.18545418946...
    
    
      16
      zalign
      [0.578860137678, 0.579984756026, 0.57940980535...



In [13]:

    
# Save table
paramdf.to_pickle('disco_reliability_params.pkl')

Compare reliabilities from different preprocessing parameters.



In [3]:

    
# Load pipeline reliabilities
db = sqlite3.connect('disco_reliability.db')
query = "SELECT * FROM ICC;"
df = pd.read_sql_query(query,db)
db.close()



In [4]:

    
# Split pipeline names into factor levels or parameters
params = pd.DataFrame(df['preproc'].str.split('_').tolist(),columns=['sync','wave','filt','down','interp','comb'])
df = pd.concat([df,params],axis=1)
df.head()









    Out[4]:






  
    
      
      preproc
      R
      P
      sync
      wave
      filt
      down
      interp
      comb
    
  
  
    
      0
      cpm_coif_short_decimate_cubic_vector
      0.331900
      0.0
      cpm
      coif
      short
      decimate
      cubic
      vector
    
    
      1
      cpm_coif_short_decimate_cubic_xalign
      0.579359
      0.0
      cpm
      coif
      short
      decimate
      cubic
      xalign
    
    
      2
      cpm_coif_short_decimate_cubic_yalign
      0.180107
      0.0
      cpm
      coif
      short
      decimate
      cubic
      yalign
    
    
      3
      cpm_coif_short_decimate_cubic_zalign
      0.578860
      0.0
      cpm
      coif
      short
      decimate
      cubic
      zalign
    
    
      4
      cpm_coif_short_decimate_linear_vector
      0.332233
      0.0
      cpm
      coif
      short
      decimate
      linear
      vector



In [5]:

    
# Convert correlations to Fisher's z
Fz = pd.DataFrame(dict(Fz=np.arctanh(df['R']).values))
df = pd.concat([df,Fz],axis=1)
df.head()









    Out[5]:






  
    
      
      preproc
      R
      P
      sync
      wave
      filt
      down
      interp
      comb
      Fz
    
  
  
    
      0
      cpm_coif_short_decimate_cubic_vector
      0.331900
      0.0
      cpm
      coif
      short
      decimate
      cubic
      vector
      0.344962
    
    
      1
      cpm_coif_short_decimate_cubic_xalign
      0.579359
      0.0
      cpm
      coif
      short
      decimate
      cubic
      xalign
      0.661497
    
    
      2
      cpm_coif_short_decimate_cubic_yalign
      0.180107
      0.0
      cpm
      coif
      short
      decimate
      cubic
      yalign
      0.182093
    
    
      3
      cpm_coif_short_decimate_cubic_zalign
      0.578860
      0.0
      cpm
      coif
      short
      decimate
      cubic
      zalign
      0.660747
    
    
      4
      cpm_coif_short_decimate_linear_vector
      0.332233
      0.0
      cpm
      coif
      short
      decimate
      linear
      vector
      0.345336



In [6]:

    
# Multiple regression of transformed correlations on categorical preprocessing parameters
est = smf.ols(formula='Fz~C(sync)+C(wave)+C(filt)+C(down)+C(interp)+C(comb)',data=df).fit()
est.summary().tables[1]









    Out[6]:






                          coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept                 0.3505      0.030     11.512   0.000      0.291     0.410


  C(sync)[T.ips]           -0.0226      0.018     -1.285   0.200     -0.057     0.012


  C(wave)[T.db]            -0.0381      0.022     -1.769   0.078     -0.080     0.004


  C(wave)[T.sym]           -0.0170      0.022     -0.792   0.429     -0.059     0.025


  C(filt)[T.medium]         0.0051      0.022      0.237   0.813     -0.037     0.047


  C(filt)[T.short]         -0.0330      0.022     -1.533   0.126     -0.075     0.009


  C(down)[T.mean]        8.232e-06      0.018      0.000   1.000     -0.035     0.035


  C(interp)[T.linear]       0.0027      0.022      0.125   0.901     -0.040     0.045


  C(interp)[T.nearest]      0.0024      0.022      0.111   0.912     -0.040     0.045


  C(comb)[T.xalign]         0.3986      0.025     16.033   0.000      0.350     0.447


  C(comb)[T.yalign]        -0.4283      0.025    -17.228   0.000     -0.477    -0.379


  C(comb)[T.zalign]         0.4520      0.025     18.180   0.000      0.403     0.501

Box and whisker plots of parameter reliabilities.



In [ ]:

    
paramname = {'cpm':'Synchrony: CPM','ips':'Synchrony: IPS',
             'coif':'Wavelet: Coiflet','db':'Wavelet: Daubechies','sym':'Wavelet: Symlet',
             'short':'Filter Length: Short','medium':'Filter Length: Medium','long':'Filter Length: Long',
             'decimate':'Downsampling: Decimate','mean':'Downsampling: Average',
             'cubic':'Interpolation: Cubic','linear':'Interpolation: Linear','nearest':'Interpolation: Nearest',
             'vector':'Combination: Standard','xalign':'Combination: X-align',
             'yalign':'Combination: Y-align','zalign':'Combination: Z-align'}

# Number of boxes (parameters)
N = paramdf.shape[0] 

# Generate an array of rainbow colors
c = ['hsl('+str(h)+',50%'+',50%)' for h in np.linspace(0,360,num=N)] 

# Each box is a dictionary containing the data, type, and color; Shows the mean for each box
data = [{'name':paramname[paramdf.loc[i][0]],
         'y':paramdf.loc[i][1],
         'type':'box',
         'marker':{'color':c[i]},
         'boxmean':True} 
        for i in range(N)]

# Format the layout
layout = {'xaxis':{'showticklabels':False},
          'yaxis':{'title':"Reliability (Intraclass correlation)",
                   'range':[-0.6,0.8],'tickformat':'.3f',
                   'zeroline':False},
          'title':'Preprocessing and Analysis Pipelines: Parameter Reliability'}

# Plot the data
fig = {'data':data,'layout':layout}
plotly.offline.iplot(fig)

Sort pipeline reliabilities from highest to lowest.



In [26]:

    
relsort = dframe.sort_values('R',ascending=False)
relsort[0:25]









    Out[26]:






  
    
      
      preproc
      R
      P
    
  
  
    
      235
      ips_coif_short_mean_linear_zalign
      0.752083
      0.0
    
    
      223
      ips_coif_short_decimate_linear_zalign
      0.752035
      0.0
    
    
      239
      ips_coif_short_mean_nearest_zalign
      0.750913
      0.0
    
    
      227
      ips_coif_short_decimate_nearest_zalign
      0.750883
      0.0
    
    
      231
      ips_coif_short_mean_cubic_zalign
      0.749176
      0.0
    
    
      219
      ips_coif_short_decimate_cubic_zalign
      0.749164
      0.0
    
    
      403
      ips_sym_medium_mean_linear_zalign
      0.739974
      0.0
    
    
      391
      ips_sym_medium_decimate_linear_zalign
      0.739923
      0.0
    
    
      395
      ips_sym_medium_decimate_nearest_zalign
      0.739172
      0.0
    
    
      407
      ips_sym_medium_mean_nearest_zalign
      0.739135
      0.0
    
    
      399
      ips_sym_medium_mean_cubic_zalign
      0.736799
      0.0
    
    
      355
      ips_db_long_mean_linear_zalign
      0.736787
      0.0
    
    
      343
      ips_db_long_decimate_linear_zalign
      0.736706
      0.0
    
    
      387
      ips_sym_medium_decimate_cubic_zalign
      0.736646
      0.0
    
    
      359
      ips_db_long_mean_nearest_zalign
      0.736059
      0.0
    
    
      347
      ips_db_long_decimate_nearest_zalign
      0.736026
      0.0
    
    
      247
      ips_coif_medium_decimate_linear_zalign
      0.731954
      0.0
    
    
      351
      ips_db_long_mean_cubic_zalign
      0.731879
      0.0
    
    
      339
      ips_db_long_decimate_cubic_zalign
      0.731867
      0.0
    
    
      259
      ips_coif_medium_mean_linear_zalign
      0.731774
      0.0
    
    
      251
      ips_coif_medium_decimate_nearest_zalign
      0.730686
      0.0
    
    
      263
      ips_coif_medium_mean_nearest_zalign
      0.730540
      0.0
    
    
      243
      ips_coif_medium_decimate_cubic_zalign
      0.728627
      0.0
    
    
      255
      ips_coif_medium_mean_cubic_zalign
      0.728409
      0.0
    
    
      427
      ips_sym_long_mean_linear_zalign
      0.725738
      0.0

Bar plots of highest pipeline reliabilities.



In [ ]:

    
N = 25 # Top 25 pipelines

pipename = list(relsort[0:N].preproc)
pipename = [p.replace('_',', ') for p in pipename]
pipename = [p.replace('mean','average') for p in pipename]

pipetext = ['']*N
pipetext[pipename.index('ips, db, long, decimate, linear, zalign')] = 'Highest discriminability'

pipecolor = ['rgb(158,202,225)']*N
pipecolor[pipename.index('ips, db, long, decimate, linear, zalign')] = 'rgb(8,48,107)'

data = [plotly.graph_objs.Bar(
        x = pipename,
        y = list(relsort[0:N].R),
        text = pipetext,
        marker = dict(
            color = pipecolor,
            line = dict(
                color = 'rgb(8,48,107)',
                width = 1.5
                )
            ),
        opacity = 0.6
        )]

# Format the layout
layout = {'xaxis':{'tickangle':315},
          'yaxis':{'title':"Reliability (Intraclass correlation)",
                   'range':[0,1],'tickformat':'.3f'},
          'title':'Preprocessing and Analysis Pipelines: Highest Reliability',
          'showlegend':False,
          'margin':{'b':225,'l':150}}

# Plot the data
fig = plotly.graph_objs.Figure(data=data,layout=layout)
plotly.offline.iplot(fig)

	cpm_coif1_decimate_cubic_vector	cpm_coif1_decimate_cubic_xalign	cpm_coif1_decimate_cubic_yalign	cpm_coif1_decimate_cubic_zalign	cpm_coif1_decimate_linear_vector	cpm_coif1_decimate_linear_xalign	cpm_coif1_decimate_linear_yalign	cpm_coif1_decimate_linear_zalign	cpm_coif1_decimate_nearest_vector	cpm_coif1_decimate_nearest_xalign	...	ips_sym6_mean_cubic_yalign	ips_sym6_mean_cubic_zalign	ips_sym6_mean_linear_vector	ips_sym6_mean_linear_xalign	ips_sym6_mean_linear_yalign	ips_sym6_mean_linear_zalign	ips_sym6_mean_nearest_vector	ips_sym6_mean_nearest_xalign	ips_sym6_mean_nearest_yalign	ips_sym6_mean_nearest_zalign
0	0.411086	0.449412	0.483949	0.526640	0.406081	0.451630	0.484344	0.527490	0.410913	0.450989	...	0.346234	0.517742	0.360442	0.477407	0.362911	0.528230	0.363596	0.449737	0.355297	0.530667
1	0.406029	0.449216	0.483830	0.529568	0.401735	0.451696	0.483964	0.530395	0.407795	0.450598	...	0.362532	0.515705	0.361382	0.478330	0.358006	0.526258	0.364685	0.450874	0.350665	0.529080
2	0.395510	0.447991	0.483617	0.532305	0.395438	0.450906	0.483510	0.533129	0.401583	0.449556	...	0.357903	0.513587	0.362428	0.479330	0.353370	0.524315	0.365736	0.452098	0.346359	0.527423
3	0.394070	0.446268	0.483077	0.535114	0.393310	0.449722	0.482721	0.535948	0.395880	0.448236	...	0.353117	0.511502	0.363594	0.480408	0.348911	0.522390	0.366878	0.453408	0.342222	0.525601
4	0.401674	0.443921	0.482273	0.537661	0.397049	0.448084	0.481633	0.538565	0.396605	0.446561	...	0.347529	0.509458	0.364846	0.481153	0.344580	0.520324	0.368071	0.454589	0.338178	0.523747

	preproc	R	P
0	cpm_coif1_decimate_cubic_vector	-0.443024	0.000000e+00
1	cpm_coif1_decimate_cubic_xalign	-0.073459	1.962499e-162
2	cpm_coif1_decimate_cubic_yalign	-0.052611	3.636690e-84
3	cpm_coif1_decimate_cubic_zalign	0.252733	0.000000e+00
4	cpm_coif1_decimate_linear_vector	-0.444704	0.000000e+00

	preproc	R	P
0	cpm_coif_short_decimate_cubic_vector	-0.443024	0.000000e+00
1	cpm_coif_short_decimate_cubic_xalign	-0.073459	1.962499e-162
2	cpm_coif_short_decimate_cubic_yalign	-0.052611	3.636690e-84
3	cpm_coif_short_decimate_cubic_zalign	0.252733	0.000000e+00
4	cpm_coif_short_decimate_linear_vector	-0.444704	0.000000e+00

	preproc	R
427	ips_sym_long_mean_linear_zalign	0.478266
428	ips_sym_long_mean_nearest_vector	0.388934
429	ips_sym_long_mean_nearest_xalign	0.302790
430	ips_sym_long_mean_nearest_yalign	-0.393082
431	ips_sym_long_mean_nearest_zalign	0.476917

	parameter	correlations
0	cpm	[-0.443023934066, -0.0734588191007, -0.0526109...
1	ips	[0.419601904344, 0.298482899634, -0.4459811573...
2	coif	[-0.443023934066, -0.0734588191007, -0.0526109...
3	db	[-0.401646998013, -0.0663185161795, -0.0501602...
4	sym	[-0.401646998013, -0.0663185161795, -0.0501602...
5	short	[-0.443023934066, -0.0734588191007, -0.0526109...
6	medium	[-0.366862499275, -0.0555178888118, -0.0589029...
7	long	[-0.295433564528, -0.0546943131961, -0.0723451...
8	decimate	[-0.443023934066, -0.0734588191007, -0.0526109...
9	mean	[-0.443069044138, -0.0734711210756, -0.0526133...
10	cubic	[-0.443023934066, -0.0734588191007, -0.0526109...
11	linear	[-0.44470389173, -0.0647630371939, -0.05051637...
12	nearest	[-0.443457036385, -0.0661041183197, -0.0508083...
13	vector	[-0.443023934066, -0.44470389173, -0.443457036...
14	xalign	[-0.0734588191007, -0.0647630371939, -0.066104...
15	yalign	[-0.0526109918102, -0.0505163771805, -0.050808...
16	zalign	[0.252732857003, 0.267245346902, 0.26694937003...

	coef	std err	t	P>\|t\|	[95.0% Conf. Int.]
Intercept	-0.1414	0.036	-3.899	0.000	-0.213 -0.070
C(sync)[T.ips]	0.2743	0.021	13.101	0.000	0.233 0.315
C(wave)[T.db]	-8.578e-05	0.026	-0.003	0.997	-0.050 0.050
C(wave)[T.sym]	0.0019	0.026	0.075	0.940	-0.048 0.052
C(filt)[T.medium]	-0.0001	0.026	-0.005	0.996	-0.051 0.050
C(filt)[T.short]	-0.0076	0.026	-0.295	0.768	-0.058 0.043
C(down)[T.mean]	-2.747e-05	0.021	-0.001	0.999	-0.041 0.041
C(interp)[T.linear]	0.0040	0.026	0.158	0.875	-0.046 0.054
C(interp)[T.nearest]	0.0040	0.026	0.155	0.877	-0.046 0.054
C(comb)[T.xalign]	0.1258	0.030	4.249	0.000	0.068 0.184
C(comb)[T.yalign]	-0.2404	0.030	-8.120	0.000	-0.299 -0.182
C(comb)[T.zalign]	0.4011	0.030	13.550	0.000	0.343 0.459

	preproc	R
0	ips_db_long_decimate_linear_zalign	0.538547
1	ips_db_long_mean_linear_zalign	0.538441
2	ips_db_long_decimate_nearest_zalign	0.537953
3	ips_db_long_mean_nearest_zalign	0.537908
4	ips_coif_short_decimate_linear_zalign	0.533696
5	ips_coif_short_mean_linear_zalign	0.533524
6	ips_coif_short_decimate_nearest_zalign	0.533006
7	ips_coif_short_mean_nearest_zalign	0.532938
8	ips_db_long_mean_cubic_zalign	0.532047
9	ips_db_long_decimate_cubic_zalign	0.532046
10	ips_coif_short_decimate_cubic_zalign	0.528236
11	ips_db_medium_decimate_linear_zalign	0.528211
12	ips_coif_short_mean_cubic_zalign	0.528182
13	ips_db_medium_mean_linear_zalign	0.528130
14	ips_db_medium_decimate_nearest_zalign	0.527213
15	ips_db_medium_mean_nearest_zalign	0.527109
16	ips_db_medium_mean_cubic_zalign	0.520981
17	ips_db_medium_decimate_cubic_zalign	0.520934
18	ips_sym_medium_decimate_linear_zalign	0.506138
19	ips_sym_medium_mean_linear_zalign	0.506107
20	ips_sym_medium_mean_nearest_zalign	0.505032
21	ips_sym_medium_decimate_nearest_zalign	0.504978
22	ips_sym_medium_mean_cubic_zalign	0.502726
23	ips_sym_medium_decimate_cubic_zalign	0.502665
24	ips_db_short_mean_linear_zalign	0.501880

	cpm_groupA_coif1_decimate_cubic_vector	cpm_groupA_coif1_decimate_cubic_xalign	cpm_groupA_coif1_decimate_cubic_yalign	cpm_groupA_coif1_decimate_cubic_zalign	cpm_groupA_coif1_decimate_linear_vector	cpm_groupA_coif1_decimate_linear_xalign	cpm_groupA_coif1_decimate_linear_yalign	cpm_groupA_coif1_decimate_linear_zalign	cpm_groupA_coif1_decimate_nearest_vector	cpm_groupA_coif1_decimate_nearest_xalign	...	ips_groupB_sym6_mean_cubic_yalign	ips_groupB_sym6_mean_cubic_zalign	ips_groupB_sym6_mean_linear_vector	ips_groupB_sym6_mean_linear_xalign	ips_groupB_sym6_mean_linear_yalign	ips_groupB_sym6_mean_linear_zalign	ips_groupB_sym6_mean_nearest_vector	ips_groupB_sym6_mean_nearest_xalign	ips_groupB_sym6_mean_nearest_yalign	ips_groupB_sym6_mean_nearest_zalign
0	0.418215	0.472479	0.419658	0.611465	0.414580	0.474915	0.398093	0.613531	0.414284	0.476677	...	0.386682	0.561145	0.442758	0.523072	0.407694	0.572926	0.444657	0.481366	0.401957	0.574316
1	0.414860	0.472894	0.422960	0.610134	0.412113	0.475309	0.400779	0.611845	0.412794	0.477215	...	0.429352	0.559734	0.443273	0.523784	0.402313	0.571635	0.445445	0.482433	0.396942	0.573601
2	0.413279	0.472751	0.425838	0.608792	0.411136	0.475225	0.403097	0.609999	0.412433	0.477244	...	0.424363	0.558158	0.443884	0.524597	0.397315	0.570337	0.446124	0.483616	0.392379	0.572728
3	0.411246	0.472116	0.428376	0.607484	0.409613	0.474663	0.405214	0.608146	0.411439	0.476744	...	0.419097	0.556589	0.444621	0.525509	0.392586	0.569016	0.446895	0.484911	0.388077	0.571552
4	0.408903	0.471527	0.429978	0.606505	0.407853	0.474100	0.406376	0.606601	0.410308	0.476248	...	0.412547	0.555034	0.445462	0.525904	0.388058	0.567430	0.447740	0.485997	0.383935	0.570273

	preproc	R
0	cpm_coif_short_decimate_cubic_vector	0.331900
1	cpm_coif_short_decimate_cubic_xalign	0.579359
2	cpm_coif_short_decimate_cubic_yalign	0.180107
3	cpm_coif_short_decimate_cubic_zalign	0.578860
4	cpm_coif_short_decimate_linear_vector	0.332233

	preproc	R	P
427	ips_sym_long_mean_linear_zalign	0.725738	0.0
428	ips_sym_long_mean_nearest_vector	0.365295	0.0
429	ips_sym_long_mean_nearest_xalign	0.706074	0.0
430	ips_sym_long_mean_nearest_yalign	-0.435204	1.0
431	ips_sym_long_mean_nearest_zalign	0.724398	0.0

	parameter	correlations
0	cpm	[0.331900174013, 0.579358714027, 0.18010708272...
1	ips	[0.34461322736, 0.679160557675, -0.41354417363...
2	coif	[0.331900174013, 0.579358714027, 0.18010708272...
3	db	[0.241233743194, 0.481703836298, 0.15899816692...
4	sym	[0.241233743194, 0.481703836298, 0.15899816692...
5	short	[0.331900174013, 0.579358714027, 0.18010708272...
6	medium	[0.300646309148, 0.586527673733, 0.20503517260...
7	long	[0.243963752708, 0.561886034352, 0.20177121670...
8	decimate	[0.331900174013, 0.579358714027, 0.18010708272...
9	mean	[0.331884810745, 0.579366087792, 0.18010289161...
10	cubic	[0.331900174013, 0.579358714027, 0.18010708272...
11	linear	[0.332232603231, 0.580365155504, 0.18460311829...
12	nearest	[0.33123645765, 0.580777150017, 0.185454189469...
13	vector	[0.331900174013, 0.332232603231, 0.33123645765...
14	xalign	[0.579358714027, 0.580365155504, 0.58077715001...
15	yalign	[0.180107082727, 0.184603118292, 0.18545418946...
16	zalign	[0.578860137678, 0.579984756026, 0.57940980535...

	preproc	R
235	ips_coif_short_mean_linear_zalign	0.752083
223	ips_coif_short_decimate_linear_zalign	0.752035
239	ips_coif_short_mean_nearest_zalign	0.750913
227	ips_coif_short_decimate_nearest_zalign	0.750883
231	ips_coif_short_mean_cubic_zalign	0.749176
219	ips_coif_short_decimate_cubic_zalign	0.749164
403	ips_sym_medium_mean_linear_zalign	0.739974
391	ips_sym_medium_decimate_linear_zalign	0.739923
395	ips_sym_medium_decimate_nearest_zalign	0.739172
407	ips_sym_medium_mean_nearest_zalign	0.739135
399	ips_sym_medium_mean_cubic_zalign	0.736799
355	ips_db_long_mean_linear_zalign	0.736787
343	ips_db_long_decimate_linear_zalign	0.736706
387	ips_sym_medium_decimate_cubic_zalign	0.736646
359	ips_db_long_mean_nearest_zalign	0.736059
347	ips_db_long_decimate_nearest_zalign	0.736026
247	ips_coif_medium_decimate_linear_zalign	0.731954
351	ips_db_long_mean_cubic_zalign	0.731879
339	ips_db_long_decimate_cubic_zalign	0.731867
259	ips_coif_medium_mean_linear_zalign	0.731774
251	ips_coif_medium_decimate_nearest_zalign	0.730686
263	ips_coif_medium_mean_nearest_zalign	0.730540
243	ips_coif_medium_decimate_cubic_zalign	0.728627
255	ips_coif_medium_mean_cubic_zalign	0.728409
427	ips_sym_long_mean_linear_zalign	0.725738