In [4]:
from __future__ import print_function

import numpy as np
import pandas as pd
from collections import OrderedDict #sorting participant df dict before pd.concat()
import matplotlib.pylab as plt
%matplotlib inline
pd.options.display.mpl_style = 'default'

import cPickle as pickle

non_english_fluent  = ['023', '031', '045', '050', '070', '106',]
left_handed = ['042', '088',]
pro_inst_skill = ['026', '037']

excluded_all_tasks = non_english_fluent + left_handed + pro_inst_skill

In [5]:
def col_matches(df, regex):
    'returns a list of columns in a df that match a regex string.'
    import re
    cols = list(enumerate(df.columns))
    matches = [c for (i, c) in cols 
               if re.findall(regex, c)]
    return matches


def compare_transformations(df, columns, functions, **kwargs):
    
    print('raw')
    df[columns].hist(**kwargs)
    plt.show()
    
    for name, func in functions.items():
       
        print(name)
        
        df[columns].apply(func).hist(**kwargs)
        plt.show()
        
        
def quickcompare(r, df, size=(15,7)): 
    inverse = lambda x: 1.0/x
    return compare_transformations(df, col_matches(df, r),
                                   {'inverse': inverse,
                                    'log1p': np.log1p,
                                    'sqrt':  np.sqrt,  
                                    'log': np.log,
                                    },
                                    figsize=size)

In [10]:
# using this for inline documentation so that it's clear
# that the printing statement isn't part of the necessary
# transformation code.
def html_print(df):
    try:
        out = df.to_html()
    except AttributeError:
        out = pd.DataFrame(df).to_html()
    return HTML(out)


def htmljoin(df_list, delimiter=''):    
    return HTML(delimiter.join([x.to_html() for x in df_list]))


def col_matches(df, regex):     
    import re    
    cols = list(enumerate(df.columns))        
    matches = [c for (i, c) in cols 
               if re.findall(regex, c)]        
    return matches


def concat_matches(df, *args):    
    assert all([len(r) for r in args])
    import re        
    col_match_lists = [col_matches(df, regex) for regex in args]    
    col_set = [df[matches] for matches in col_match_lists]    
    
    if len(col_set) == 0:
        return None    
    elif len(col_set) == 1:
        return col_set[0]    
    else:
        return pd.concat(col_set, axis=1)

    
def show_frames(frame_list, delimiter=''):
    
    from IPython.display import HTML
    
    if len(frame_list) == len(delimiter):        
        html_out = ""
        item_template = '<p><strong>{}</strong></p>{}<br>'
        
        for i, tup in enumerate(zip(frame_list, delimiter)):
            frame = tup[0]
            tag = tup[1]
            html_out += item_template.format(tag, frame.to_html())            
        return HTML(html_out)
        
    else:        
        html_out = [df.to_html() for df in frame_list]        
        return HTML(delimiter.join(html_out))

In [11]:
pfilenames = "c:/db_pickles/pickle - dfo-{measure} - {updated}.pickle"

updated_scales = '2014-10-29a'
updated_isip = '2014-10-12b'
updated_sms = '2014-10-20a'

sms_df = pd.read_pickle(pfilenames.format(measure = 'sms', updated = updated_sms))

scales_df = pd.read_pickle(pfilenames.format(measure = 'scales',
                                             updated = updated_scales))

isip_measures = ['isip5_lag2',
                 'isip5_avgprev4',
                 'isip8_lag2',
                 'isip8_avgprev4',
                 ]

isip_frames = {m: pd.read_pickle(pfilenames.format(measure = m,
                                                   updated = updated_isip))
               for m in isip_measures}

sms_frames = {'sms': sms_df}
scales_frames = {'scales': scales_df}

In [12]:
from itertools import chain

task_output = OrderedDict(chain(scales_frames.items(),
                                sms_frames.items(),
                                isip_frames.items(),
                                ))

In [17]:
dfo = pd.concat(task_output.values(), 
                axis=1, #defaults
                join='outer', 
                keys=task_output.keys(),
                names=['set'], 
                )

to_drop = set(dfo.index).intersection(excluded_all_tasks)

if to_drop:
    print("Dropping: {}".format(list(to_drop)))
    dfo = dfo.drop(to_drop)
    
assert len(set(dfo.index).intersection(excluded_all_tasks))==0


Dropping: ['026', '037']

In [18]:
full_updated = '2014-10-29a'

pfilenames = "c:/db_pickles/pickle - dfo-{measure} - {updated}.pickle"
output_file = pfilenames.format(measure='full', updated=full_updated)
pickle.dump(dfo, open(output_file, "wb"))

In [22]:
# To use this outside of pandas (R) we'll want to make a flat table
# where the outer index is converted to a prefix for the variable name.

top_level_label = {'scales':         'SCAL',
                   'sms':            'SMSR',
                   'isip8_lag2': 'I8L2',
                   'isip5_lag2': 'I5L2',
                   'isip8_avgprev4': 'I8P4',
                   'isip5_avgprev4': 'I5P4',
                    }

dfo_flat_subset = dfo.copy().xs(top_level_label.keys(), axis=1)

dfo_flat_subset.columns = ['_'.join([top_level_label[outer], inner]) 
                           for (outer, inner) in dfo_flat_subset.columns]

#shorten variable name for export
dfo_flat_subset.columns = [c.replace('avgprev4sq_', "") for c in dfo_flat_subset.columns]

In [23]:
full_updated = '2014-10-29a'
pfilenames = "c:/db_pickles/pickle - dfo-{measure} - {updated}.{ext}"
output_file_csv =    pfilenames.format(measure='flat', updated=full_updated, ext="csv")
output_file_pickle = pfilenames.format(measure='flat', updated=full_updated, ext="pickle")

pickle.dump(dfo_flat_subset, open(output_file_pickle, "wb"))

# Re-code NaNs for external analyses
dfo_coded_nan = dfo_flat_subset.replace(np.nan, 77777)
dfo_coded_nan.to_csv(output_file_csv)

In [24]:
dfo_flat_subset.T[::15]


Out[24]:
015 016 017 018 019 020 021 022 024 025 ... 112 113 114 115 116 117 118 119 120 121
SCAL_session_day 20140228 20140303 20140303 20140303 20140303 20140303 20140304 20140304 20140304 20140304 ... 20140508 20140508 20140508 20140508 20140509 20140509 20140509 20140509 20140509 20140509
SCAL_calc_bfi_openness 2.8 3.7 3.7 3.6 3.3 2.5 2.7 4.7 4.2 3.2 ... 4.3 3.6 3.6 4.3 4.4 3 3.9 4.3 4.9 4.2
SCAL_notes_bfi ...
SCAL_wasivocab_itemscore02_shovel 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
SCAL_wasivocab_itemscore17_decade 2 2 2 2 2 2 2 2 1 2 ... 1 2 2 2 2 1 2 2 2 2
SCAL_wasimatrix_itemscore01 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
SCAL_wasimatrix_itemscore16 0 1 1 1 1 0 1 1 1 1 ... 1 1 1 1 1 0 1 1 1 1
SCAL_qbasic_isfemale 1 0 0 0 0 0 1 1 1 0 ... 1 0 1 1 0 1 0 1 0 1
SCAL_qbasic_relationshipyears 2 0 0 0 0 0 1 0 0 0 ... 1 2 0 3 0 1 0 1 0 0
SCAL_qbasic_exerciseyn 0 0 1 1 0 0 0 0 0 1 ... 0 0 0 0 1 0 0 0 0 0
SCAL_qmusic_gamehoursall 5 0 0 0 10 5 0 0 10 28 ... 3 2 8 0 1 8 0 10 0 2
SCAL_bfi_item02 2 3 4 2 2 1 3 4 1 3 ... 1 3 2 3 4 2 5 3 3 3
SCAL_bfi_item17 1 5 2 4 4 5 4 3 5 3 ... 5 5 5 4 4 4 5 5 4 3
SCAL_bfi_item32 3 5 3 5 5 4 4 4 5 2 ... 5 5 5 2 4 5 5 5 3 4
SCAL_qmusic_dancehours_nonzero 1 1 0 1 1 0 0 0 1 1 ... 1 0 1 1 1 1 1 1 0 0
SCAL_orders_phase 2 2 2 0 2 0 2 0 2 1 ... 1 0 0 1 2 0 0 0 2 0
SCAL_order_lin8j 9 12 10 14 10 11 11 11 12 13 ... 13 13 13 13 10 12 14 12 11 12
SMSR_iso8t1_DPm -3.198754 -2.827224 -13.31115 -0.5532028 -2.3272 -3.092538 -1.458121 0.9686097 -2.741571 0.3546706 ... -5.737524 0.2056869 -0.8263522 -2.226755 -1.372491 -1.247007 1.162443 -0.06084488 -9.056157 -3.304496
SMSR_iso5t2_ints_mean 497.7625 500.7283 499.561 500.0639 500.2271 500.4319 499.6852 499.952 499.9137 502.1116 ... 499.2338 500.8816 500.1932 499.4348 500.5344 499.8039 500.2396 499.958 500.086 499.497
SMSR_lin5t_dev_mean -56.08282 -29.30826 -54.49151 -32.42488 -17.69265 -50.66523 -50.73567 -19.21868 -15.79687 -5.572805 ... -90.36823 -7.757573 -73.42938 -27.87748 -21.62065 -38.02459 -12.0379 -29.42216 -41.91985 -51.97161
SMSR_phase5t_DPm -8.176227 -2.727189 -4.037511 -3.222491 -0.05654191 -3.009335 -4.691215 -0.8777716 -1.028469 2.607912 ... -7.511362 2.75177 -4.86865 -0.9072752 -1.493659 -7.582498 3.159855 2.719712 -6.585747 -2.178178
SMSR_phase8t_ints_mean 854.141 799.6398 799.1124 800.6056 800.2841 799.3914 800.748 799.8787 799.9338 805.9211 ... 800.5021 801.6799 800.7783 800.6417 800.7478 801.5609 800.0165 801.3236 799.9278 808.1808
SMSR_iso8j_dev_mean -88.09475 -49.75272 -80.51963 -39.90446 -77.28071 -63.80611 -71.43951 13.19332 -55.40579 -17.78908 ... -96.03697 -17.07073 -95.44311 -47.41177 -47.15015 -37.05709 0.4706038 -41.18913 -59.59362 -85.35649
SMSR_lin8j_DPm -2.533303 -2.792467 2.636454 -0.01542999 4.258314 -5.878196 2.467047 0.5887284 -2.485666 0.5191878 ... -9.6238 3.214722 -10.07283 -1.967341 -0.026768 -9.894439 0.8668867 1.388157 -2.93977 -2.640397
SMSR_phase5j_ints_mean 506.8288 499.9173 502.1424 499.1505 500.969 499.8378 499.8912 499.5976 500.2107 499.647 ... 500.6447 499.4388 500.236 500.3357 500.1026 502.6973 500.2303 500.3979 500.0319 499.8732
SMSR_phase8j_mpk_dev_mean -79.62756 -36.40628 -81.11475 -2.749477 -13.86751 -91.65142 -78.06689 -7.354462 -42.08086 -4.122462 ... -95.79649 -33.15391 -99.04375 -64.691 -65.41613 -17.4071 -28.808 -28.49723 -78.43963 -94.47895
SMSR_phase8tp_mpk_DPm -7.140225 -3.235273 -11.71376 4.041789 -0.5309412 -2.577912 -2.118466 -0.9703458 -2.596259 1.407291 ... -9.201369 0.6187495 -5.925623 -2.968792 -2.300928 0.5214794 0.9848873 -0.6199092 -4.715839 -2.282429
SMSR_phase8t_mpr_ints_mean 860.2441 798.0705 794.8977 792.7024 802.1488 803.5191 800.7006 792.727 796.321 811.0014 ... 800.3062 802.3016 799.1497 797.8899 798.3463 802.1358 800.9553 801.6578 795.8792 805.1593
SMSR_phase5j_mpr_dev_mean -50.26178 -14.76985 -8.999873 -32.08286 -28.65655 -42.44111 -27.44129 -48.19151 -10.88 -24.22806 ... -57.69003 19.43637 -51.56131 -16.02781 -24.93797 -17.4806 -29.83729 -19.30714 -46.16006 -8.326523
SMSR_phase5t_mpr_DPm -12.23243 -3.513054 -5.154859 -2.50175 0.2565294 -5.129334 -5.860247 -1.669478 -3.13251 1.938041 ... -5.24474 1.745637 -3.206258 -2.324636 -1.114924 -8.193405 8.807288 2.862179 -6.574524 -5.232282
SMSR_phase8j_psk_ints_mean 784.9812 799.4197 805.6947 802.441 544.9198 713.3414 805.3557 810.4893 799.2237 586.9541 ... 583.3398 803.537 671.0698 805.7653 784.7367 678.0739 791.1003 797.979 809.8003 748.8567
SMSR_phase8tp_psk_dev_mean -50.91143 -39.09818 -50.33416 58.50181 -6.125333 -44.83719 -0.1602748 -5.275 -20.88967 -5.535 ... -43.32022 -15.734 -55.33533 -2.387 -19.805 -43.359 30.537 -9.380667 -35.2106 -22.77448
SMSR_phase5j_psk_DPm -9.344384 -1.153378 -3.226146 -1.629408 -6.722491 -10.10705 -4.234703 -6.802409 -0.8209128 -4.411202 ... -11.40426 1.375 -10.93867 0.682929 -4.114753 -4.736637 -3.420041 -1.031487 -8.463048 -2.845095
SMSR_phase5j_psr_ints_mean 424.6862 496.433 370.9547 495.8177 451.8123 347.7508 500.6287 456.572 459.4063 462.9783 ... 404.6119 382.4583 421.5708 461.0163 492.9083 505.074 505.492 507.864 502.129 501.7723
SMSR_phase5t_psr_dev_mean -10.77273 -23.94733 -16.24867 22.54767 10.64733 -8.098464 -17.88367 2.382921 -5.860746 7.635087 ... -36.13908 7.519333 -17.80067 -15.92233 0.6186667 -24.97372 37.26721 12.706 -22.69108 5.455534
SMSR_phase8t_nrm_DPm -5.078323 -3.202527 -11.17975 0.8494247 -0.6688233 -2.777164 -2.903466 -0.2268467 -2.632887 0.8131526 ... -5.441476 1.11521 -7.397457 -3.748128 -1.774755 -1.543361 2.506108 0.04823645 -5.629138 -3.905582
SMSR_phase5j_nrm_ints_mean 501.0488 500.4317 497.811 497.976 499.3087 501.4147 500.0498 498.73 499.6319 499.6477 ... 501.6538 500.1294 499.322 498.9119 500.7787 499.4885 500.1757 498.786 498.9888 499.419
SMSR_lint_610690_dev_mean -43.56383 -23.69982 -34.07529 -13.36782 -6.537333 -41.06354 -27.31262 -7.483244 -16.6308 9.663867 ... -95.22391 2.222044 -41.00091 -16.31708 -13.41098 -23.47564 7.974311 -17.76238 -25.82191 -17.1446
SMSR_lint_700800_DPm -3.457716 -2.836999 -5.324347 -2.588808 -0.5348895 -3.920463 -2.249709 -1.423945 -3.538202 2.053367 ... -6.206299 -0.2037628 -2.421917 -3.037278 -3.049772 -3.185119 0.8063814 -1.259571 -4.405488 -6.258277
SMSR_lint_500600_ints_mean 589.4669 556.8507 559.6688 556.4262 556.9577 557.5166 557.2973 557.415 555.6666 563.1187 ... 565.8739 563.6617 557.8447 558.8867 559.3468 562.4424 560.3478 558.3712 556.7481 559.5265
SMSR_linj_500600_dev_mean -53.02957 -24.77545 -38.33945 NaN -42.62447 -52.70923 -26.50805 -32.03333 -20.17362 -20.1902 ... -96.46904 -5.09497 -85.95285 -29.80226 -12.3015 -68.68218 -14.45013 -9.628119 -23.82299 -18.08722
I5P4_lagdev_local 2.918599 2.692996 2.764646 2.080451 1.397983 3.357435 3.454273 2.634599 2.834259 2.568266 ... 2.629085 1.89882 4.873864 2.944964 2.272517 2.884701 3.168178 1.649012 2.400801 2.143469
I8L2_ints_variance 2173.496 887.7761 1740.214 1034.252 676.3646 2114.371 1388.896 511.8083 903.3189 2234.78 ... 1722.752 830.9859 1941.435 492.8784 431.2057 1463.46 1311.735 550.8182 666.7421 1675.055
I5L2_lag2devsq_count 114 105 111 115 114 106 115 116 113 104 ... 111 113 113 109 111 106 111 106 118 113

44 rows × 97 columns

Below are data explorations after the exporting work has finished...


In [26]:
#quickcompare('DPsd$', df=dfo_flat_subset)
# demonstrates that the inverse transform works really well here (or did before I worked on earlier stages...)

quickcompare('nrm_DPsd', df=dfo_flat_subset)  #looks good
quickcompare('psk_DPsd', df=dfo_flat_subset)


raw
inverse
log1p
log
sqrt
raw
inverse
log1p
log
sqrt

In [30]:
scales = dfo.xs('scales', axis=1)
#scales

In [32]:
females = scales[scales.session_isfemale==True]
males = scales[scales.session_isfemale==False]
len(males)

allstats = scales.describe()[1:3]
malestats = males.describe()[1:3]
femalestats = females.describe()[1:3]

allstats.T


Out[32]:
mean std
session_day 20140411.371134 74.619032
session_isfemale 0.608247 0.490678
sex_femalezero 0.391753 0.490678
participant_age 20.979381 5.055895
calc_wasivocab_totalrawscore 38.412371 5.448151
calc_wasivocab_tscore 52.360825 9.084311
calc_bfi_extraversion 3.518041 0.877227
calc_bfi_agreeableness 3.914089 0.626388
calc_bfi_conscientiousness 3.663230 0.640588
calc_bfi_neuroticism 2.854566 0.735095
calc_bfi_openness 3.791753 0.604440
calc_qmusic_socialimportance 4.295533 1.464769
order_500ms_first 0.556701 0.499355
order_rhythmfirst 0.515464 0.502357
wasivocab_itemscore01_fish 1.000000 0.000000
wasivocab_itemscore02_shovel 1.000000 0.000000
wasivocab_itemscore03_shell 1.000000 0.000000
wasivocab_itemscore04_shirt 2.000000 0.000000
wasivocab_itemscore05_car 1.958763 0.285684
wasivocab_itemscore06_lamp 1.989691 0.101535
wasivocab_itemscore07_bird 1.927835 0.297469
wasivocab_itemscore08_tongue 1.783505 0.413995
wasivocab_itemscore09_pet 1.742268 0.462744
wasivocab_itemscore10_lunch 1.876289 0.414772
wasivocab_itemscore11_bell 1.731959 0.489801
wasivocab_itemscore12_calendar 1.793814 0.431522
wasivocab_itemscore13_alligator 1.896907 0.338025
wasivocab_itemscore14_dance 1.711340 0.499140
wasivocab_itemscore15_summer 1.567010 0.720347
wasivocab_itemscore16_reveal 1.876289 0.388848
... ... ...
qmusic_gamehoursall_nonzero 0.484536 0.502357
qmusic_gamehoursdrumsticks_nonzero 0.195876 0.398935
qmusic_behaviors_08_otherprs_ln1p 1.846726 0.954699
qmusic_behaviors_09_danceprv_ln1p 0.938295 0.989376
qmusic_dancelevel_ln1p 0.625159 0.645580
qmusic_dancelevel_tophalf 0.402062 0.492861
orders_500 0.443299 0.499355
orders_800 0.556701 0.499355
orders_iso 0.989691 0.810026
orders_phase 0.969072 0.883337
orders_linear 1.041237 0.762637
order_iso5t1 1.443299 0.499355
order_iso8t1 1.556701 0.499355
order_iso5t2 5.422680 1.676026
order_iso8t2 5.536082 1.714290
order_psh5t 5.381443 1.862141
order_psh8t 5.494845 1.809257
order_lin5t 5.525773 1.594867
order_lin8t 5.639175 1.614940
order_iso5j 11.422680 1.676026
order_iso8j 11.536082 1.714290
order_psh5j 11.381443 1.862141
order_psh8j 11.494845 1.809257
order_lin5j 11.525773 1.594867
order_lin8j 11.639175 1.614940
order_isip5 15.443299 0.499355
order_isip8 15.556701 0.499355
orderc_iso_before_lin 0.484536 0.502357
orderc_iso_before_phase 0.525773 0.501929
orderc_phase_before_lin 0.556701 0.499355

162 rows × 2 columns


In [ ]:
search= "ethnicity"
cols=concat_matches(scales, search)
cols.to_csv('ethnicity.csv')

In [125]:
concat_matches(scales, 'hours').describe().T


Out[125]:
count mean std min 25% 50% 75% max
qmusic_instrumenthours 97 7.927835 27.719107 0 0 0 4 250
qmusic_gamehoursall 97 5.569072 25.235418 0 0 0 3 240
qmusic_gamehoursdrumsticks 97 0.432990 1.396385 0 0 0 0 10
qmusic_singinghours_nonzero 97 0.206186 0.406667 0 0 0 0 1
qmusic_dancehours_nonzero 97 0.432990 0.498063 0 0 0 1 1
qmusic_instrumenthours_nonzero 97 0.422680 0.496552 0 0 0 1 1
qmusic_drumhours_nonzero 97 0.061856 0.242145 0 0 0 0 1
qmusic_gamehoursall_nonzero 97 0.484536 0.502357 0 0 0 1 1
qmusic_gamehoursdrumsticks_nonzero 97 0.195876 0.398935 0 0 0 0 1

In [123]:
len(scales.qmusic_dancelevel[scales.qmusic_dancelevel==0])


Out[123]:
46

In [ ]:
search= "yn$"
cols=concat_matches(scales, search)
#cols.to_csv('qualifiers_yn.csv')
print(search+'\n------')

cols.apply(pd.value_counts).T

In [26]:
dfo.to_csv('csv_dfo_929.csv')

In [6]:
x = dfo.xs('isip5_avgprev3', axis=1).lagdev_avgprev3sq_local
y = dfo.xs('isip8_avgprev3', axis=1).lagdev_avgprev3sq_local

x.corr(y)


Out[6]:
0.64749740485689034

In [7]:
dfscatter = pd.concat([x, y], keys=['isip500local', 'isip800local'], axis=1)

#dfscatter_inverse = 1.0 / dfscatter
dfscatter.plot(x=0, y=1, kind='scatter')

#print((1 / x).corr(1 / y))

#(1 / dfscatter).plot(x=0, y=1, kind='scatter')

dfscatter.sort(columns='isip500local')


Out[7]:
isip500local isip800local
019 1.458044 2.409286
075 1.512254 2.097847
040 1.520219 2.595587
032 1.713234 1.580831
119 1.736570 2.134176
030 1.882929 2.266210
054 1.924084 2.853508
110 1.988980 2.310470
113 2.020436 1.798715
057 2.115470 2.445437
048 2.126246 1.826718
018 2.180362 3.365396
081 2.205461 2.434559
078 2.206225 2.207061
013 2.230631 1.679067
121 2.288308 3.344639
038 2.323221 3.113442
041 2.336903 2.582472
066 2.344056 2.374577
059 2.374586 3.266615
034 2.412615 2.204436
116 2.422717 2.186702
120 2.472802 2.712842
098 2.520819 2.484190
027 2.545177 2.032936
112 2.548912 3.962077
107 2.553538 3.174529
011 2.561063 2.819736
091 2.575460 2.433766
061 2.584169 2.327133
... ... ...
108 3.266065 2.781837
102 3.287046 1.881973
092 3.349463 2.871268
026 3.356011 1.784634
118 3.365157 3.464296
085 3.379907 3.044101
020 3.411905 4.007090
097 3.421431 2.735617
062 3.432983 4.354812
086 3.493859 3.497730
037 3.518156 2.877774
055 3.520752 2.811153
090 3.530500 2.866428
044 3.543828 3.340074
046 3.574300 3.018440
021 3.592341 2.720564
087 3.655366 2.490506
012 3.720961 2.964895
079 3.901361 5.243465
094 4.091857 2.603532
071 4.112043 4.450198
072 4.185346 3.492831
067 4.214285 3.106370
073 4.296935 4.520292
065 4.406004 4.210819
036 4.641768 5.366874
114 4.898744 4.989202
069 4.905948 4.665402
077 4.959826 4.180945
049 5.224991 7.989958

102 rows × 2 columns


In [10]:
lookup = lambda text: [c for c in dfo_flat.columns 
                       if text in c]
lookup2 = lambda tup: set(lookup(tup[0])).intersection(set(lookup(tup[1])))
lookup2(('isip5', 'ints_count'))


Out[10]:
{'isip5_avgprev2_x_ints_count',
 'isip5_avgprev3_x_ints_count',
 'isip5_avgprev4_x_ints_count',
 'isip5_avgprev_12_x_ints_count',
 'isip5_lag2_x_ints_count'}

In [18]:
compare = pd.concat([scales, sms_jitter, sms_ticks, dfo_isip8], axis=1)

#scales.append(sms_jitter) #['tick_index'] = sms_ticks_index
#scales.append(sms_ticks)
#scales['jit_index'] = sms_jitter_index
compare


Out[18]:
session_day session_time session_isfemale exclusion_jitterlinearmissing exclusion_rhythmadminerror sex_femalezero participant_age calc_wasivocab_totalrawscore calc_wasimatrix_totalscore calc_agecolumn ... ints_count ints_mean ints_variance ints_stdev lag2devsq_sum lag2devsq_count lag2devsq_mean lag2devsq_local_sq_abs lag2devsq_local lag2devsq_drift
011 20140226 2:40pm 1 1 0 0 19 40 24 2 ... 128 776.994000 883.209799 29.718846 208781.623552 126 1656.997012 828.498506 3.704488 0.951965
012 20140226 4:00pm 1 1 0 0 57 33 18 8 ... 120 831.791733 986.297632 31.405376 176171.441216 118 1492.978315 746.489158 3.284711 1.861734
013 20140227 8:00am 0 1 1 1 21 42 20 3 ... 114 834.087930 338.276056 18.392283 60632.582384 106 572.005494 286.002747 2.027558 0.866818
015 20140228 1:00pm 1 0 0 0 21 37 15 3 ... 137 702.533956 2430.437702 49.299470 302872.981344 131 2312.007491 1156.003746 4.839632 5.081493
016 20140303 9:10am 0 0 0 1 19 48 22 2 ... 112 811.894107 887.776090 29.795572 169532.034352 108 1569.741059 784.870529 3.450640 1.249453
017 20140303 10:30am 0 0 0 1 23 41 23 3 ... 109 844.185541 1740.213950 41.715872 222698.851904 107 2081.297681 1040.648841 3.821327 3.133114
018 20140303 1:00pm 0 0 0 1 19 37 22 2 ... 114 807.715930 1034.252108 32.159790 260203.336288 112 2323.244074 1161.622037 4.219624 NaN
019 20140303 2:20pm 0 0 0 1 19 40 23 2 ... 116 794.750414 676.364586 26.007010 129798.102080 114 1138.579843 569.289921 3.002175 1.302005
020 20140303 3:37pm 0 0 0 1 18 41 20 2 ... 111 819.063459 2114.370996 45.982290 308034.627472 107 2878.828294 1439.414147 4.632072 3.171907
021 20140304 9:40am 1 0 0 0 19 39 17 2 ... 121 761.102909 1388.895981 37.267895 140437.019936 119 1180.143025 590.071512 3.191604 3.713490
022 20140304 12:30nn 1 0 0 0 23 42 19 3 ... 109 823.375670 511.808289 22.623180 66120.177264 105 629.715974 314.857987 2.155060 1.704434
024 20140304 3:20pm 1 0 0 0 18 33 23 2 ... 124 741.422387 903.318885 30.055264 147698.985472 122 1210.647422 605.323711 3.318396 2.328300
025 20140304 4:50pm 0 0 0 1 21 36 20 3 ... 105 873.708838 2234.780431 47.273464 125167.630608 103 1215.219715 607.609857 2.821277 4.616893
026 20140305 8:00am 0 0 0 1 20 35 23 3 ... 117 787.017641 326.880811 18.079845 59249.180048 115 515.210261 257.605131 2.039355 1.057562
027 20140305 9:10am 0 0 0 1 19 39 22 2 ... 112 822.994571 522.639055 22.861300 97856.858672 110 889.607806 444.803903 2.562638 1.071990
028 20140305 3:40pm 0 0 0 1 18 44 23 2 ... 122 753.348820 667.002049 25.826383 133482.370400 120 1112.353087 556.176543 3.130475 1.397410
029 20140306 8:20am 1 0 0 0 20 39 24 3 ... 115 804.279235 1867.368872 43.213064 394605.813712 113 3492.086847 1746.043424 5.195420 1.369522
030 20140306 12:40nn 1 0 0 0 43 46 23 6 ... 108 853.777815 806.482565 28.398637 120379.428128 106 1135.654982 567.827491 2.791023 1.809425
032 20140306 3:30pm 1 0 0 0 18 41 23 2 ... 117 792.519453 280.617415 16.751639 40211.747744 115 349.667372 174.833686 1.668409 1.297775
033 20140307 1:00pm 0 0 0 1 19 43 24 2 ... 116 795.213966 677.040070 26.019994 141114.364880 114 1237.845306 618.922653 3.128486 0.958670
034 20140307 2:20pm 0 0 0 1 19 39 21 2 ... 114 812.306596 513.101841 22.651751 98116.291088 112 876.038313 438.019157 2.576479 1.066719
035 20140310 10:30am 1 0 0 0 20 42 22 3 ... 116 778.160310 1199.769973 34.637696 236568.727616 112 2112.220782 1056.110391 4.176241 1.540275
036 20140310 2:20pm 1 0 0 0 19 32 19 2 ... 129 703.468155 2774.862029 52.676959 571834.011472 125 4574.672092 2287.336046 6.798617 3.138733
037 20140311 9:40am 1 0 0 0 18 41 28 2 ... 119 775.521647 1463.407121 38.254505 139151.014144 117 1189.324907 594.662454 3.144426 3.800600
038 20140311 3:10pm 0 0 0 1 18 41 22 2 ... 112 807.224500 981.703129 31.332142 141240.152480 108 1307.779190 653.889595 3.167801 2.242947
039 20140312 12:00nn 0 0 0 1 21 27 25 3 ... 120 763.926033 950.985383 30.838051 212786.534608 118 1803.275717 901.637859 3.930653 0.919563
040 20140325 5:10pm 0 0 0 1 21 39 26 3 ... 118 765.104644 914.416946 30.239328 109097.011056 114 956.991325 478.495663 2.859026 2.728872
041 20140326 1:00pm 1 0 0 0 23 40 23 3 ... 119 770.669849 579.816556 24.079380 102163.465424 117 873.192012 436.596006 2.711262 1.552867
043 20140328 11:00am 1 0 0 0 19 29 15 2 ... 132 701.174636 1999.840545 44.719577 188007.440288 130 1446.211079 723.105540 3.835082 5.095939
044 20140331 10:30am 1 0 0 0 18 40 22 2 ... 111 827.880505 1191.182247 34.513508 164717.853136 109 1511.172965 755.586482 3.320278 2.521008
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
091 20140430 10:30am 1 0 0 0 23 40 23 3 ... 131 704.692244 760.246930 27.572576 120765.580960 129 936.167294 468.083647 3.070169 2.425567
092 20140430 12:00nn 1 0 0 0 20 36 21 3 ... 120 764.673100 1090.989115 33.030124 146268.311696 118 1239.561964 619.780982 3.255692 2.838772
093 20140430 1:30pm 1 0 0 0 19 34 20 2 ... 117 779.269573 886.952485 29.781748 135531.019856 113 1199.389556 599.694778 3.142515 2.174944
094 20140501 7:35am 1 0 0 0 22 35 20 3 ... 81 945.213432 1126.350208 33.561141 103098.920160 60 1718.315336 859.157668 3.101034 1.729348
095 20140501 9:00am 0 0 0 1 19 43 21 2 ... 115 795.117287 774.746157 27.834262 103241.385440 113 913.640579 456.820290 2.688076 2.242496
096 20140501 10:35am 1 0 0 0 22 40 26 3 ... 105 874.146171 1060.140879 32.559805 154800.927216 103 1502.921623 751.460812 3.135950 2.009880
097 20140502 9:00am 0 0 0 1 19 39 21 2 ... 115 800.441391 824.802565 28.719376 168980.032944 113 1495.398522 747.699261 3.416126 1.097001
098 20140502 10:25am 0 0 0 1 21 38 26 3 ... 118 777.291593 567.512488 23.822521 130337.320464 116 1123.597590 561.798795 3.049344 0.307521
099 20140502 1:00pm 0 0 0 1 19 36 18 2 ... 124 741.676419 2017.520730 44.916820 287594.460320 122 2357.331642 1178.665821 4.628933 3.905071
100 20140502 2:50pm 1 0 0 0 18 39 25 2 ... 118 775.535661 1054.056078 32.466230 173694.763488 116 1497.368651 748.684325 3.528155 2.253267
101 20140502 5:35pm 0 0 0 1 19 40 19 2 ... 126 726.387333 1340.031487 36.606441 134605.189536 124 1085.525722 542.762861 3.207279 3.887175
102 20140503 9:30am 1 0 0 0 18 41 21 2 ... 114 804.779053 531.380717 23.051697 72300.804720 112 645.542899 322.771450 2.232394 1.794693
103 20140503 10:50am 0 0 0 1 21 41 20 3 ... 116 790.648517 869.395279 29.485510 160461.065344 114 1407.553205 703.776602 3.355320 1.627688
104 20140505 1:30pm 1 0 0 0 21 29 21 3 ... 113 799.264850 774.593421 27.831518 117943.966224 109 1082.054736 541.027368 2.910174 1.912116
105 20140505 5:50pm 1 0 0 0 19 37 22 2 ... 123 744.315415 1875.171194 43.303247 273473.501568 121 2260.111583 1130.055792 4.516406 3.667370
107 20140506 12:00nn 0 0 0 1 20 43 21 3 ... 113 810.179221 935.669667 30.588718 192996.431600 111 1738.706591 869.353295 3.639293 1.005146
108 20140507 12:00nn 1 0 0 0 19 37 19 2 ... 118 779.401288 886.402550 29.772513 160667.758368 116 1385.066882 692.533441 3.376442 1.786460
109 20140507 1:30pm 1 0 0 0 21 34 22 3 ... 122 748.960230 1078.115928 32.834676 232252.546624 120 1935.437889 967.718944 4.153516 1.402878
110 20140507 3:00pm 0 0 0 1 19 47 21 2 ... 114 807.640421 530.966500 23.042710 113958.267760 112 1017.484534 508.742267 2.792742 0.583708
111 20140507 5:00pm 1 0 0 0 20 41 26 3 ... 103 896.778214 1346.015472 36.688084 180413.440000 101 1786.271683 893.135842 3.332527 2.373047
112 20140508 12:20nn 1 0 0 0 20 30 19 3 ... 125 730.919936 2441.423644 49.410764 332315.308624 123 2701.750477 1350.875238 5.028493 4.518067
113 20140508 2:00pm 0 0 0 1 19 33 20 2 ... 114 807.840737 830.985874 28.826826 59596.038976 112 532.107491 266.053745 2.019105 2.942201
114 20140508 5:00pm 1 0 0 0 19 44 25 2 ... 117 760.652444 1941.435481 44.061724 309333.439024 111 2786.787739 1393.393869 4.907391 3.077659
115 20140508 6:30pm 1 0 0 0 19 47 22 2 ... 119 777.876975 492.878399 22.200865 108679.512016 117 928.884718 464.442359 2.770480 0.685526
116 20140509 7:30am 0 0 0 1 20 39 27 3 ... 113 794.877381 431.205669 20.765492 115885.327472 109 1063.168142 531.584071 2.900587 NaN
117 20140509 10:30am 1 0 0 0 19 34 20 2 ... 78 931.637744 1463.459871 38.255194 127423.589056 56 2275.421233 1137.710617 3.620502 1.937290
118 20140509 12:00nn 0 0 0 1 20 42 22 3 ... 109 831.885431 1311.735249 36.217886 243292.114656 105 2317.067759 1158.533879 4.091578 1.487880
119 20140509 3:00pm 1 0 0 0 19 38 23 2 ... 111 832.191856 550.818229 23.469517 99282.943600 109 910.852694 455.426347 2.564399 1.173633
120 20140509 4:30pm 0 0 0 1 24 33 24 3 ... 121 758.141686 666.742129 25.821350 148368.920576 119 1246.797652 623.398826 3.293310 0.868382
121 20140509 7:30pm 1 0 0 0 18 36 20 2 ... 105 870.896114 1675.054895 40.927435 267651.276272 103 2598.556080 1299.278040 4.138898 2.225864

102 rows × 235 columns