Intial commands


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import redcaputils
import statsmodels.formula.api as smf
%matplotlib inline
pd.set_option('display.max_columns', None)

In [2]:
patients = pd.read_csv("patients.csv")
controls = pd.read_csv("controls.csv")
df = pd.concat([patients, controls])

Regression models


In [3]:
cols_rename = { 'redcap_event_name': 'PATIENT', 'bdi_v2': 'bdi', 'stai_i_v2': 'stai_i', 'stai_ii_v2': 'stai_ii',
    'fss_v2': 'fss', 'ess_v2': 'ess', 'vas_now_v2': 'vas_now', 'vas_4wk_aver_v2': 'vas_4wk_aver',
    'scc_v2': 'scc', 'eq5d3l_v2': 'eq5d3l', 'eq5d3l_vas_v2': 'eq5d3l_vas', 'bfi_e_v2': 'bfi_e',
    'bfi_p_v2': 'bfi_p', 'bfi_s_v2': 'bfi_s', 'bfi_n_v2': 'bfi_n', 'bfi_o_v2': 'bfi_o',
    'sf_12_v2': 'sf_12', 'f_phq_suma': 'f_phq',
}
values_changes = { 'F': 0, 'M': 1, 'enrollment_arm_1': 1, 'enrollment_arm_2': 0 }
df = redcaputils.convert_dataframe( df, columns_conversion_dict=cols_rename, values_conversion_dict=values_changes )
#df.head()

In [4]:
model = smf.ols(formula="""sf_12 ~
age
+ pohlavi
+ duration
+ s_fmdrs_sum
+ rls_dg
+ bdi
+ stai_i
+ stai_ii
+ fss
+ ess
+ vas_now
+ vas_4wk_aver
+ scc
+ eq5d3l
+ eq5d3l_vas
+ bfi_e
+ bfi_p
+ bfi_s
+ bfi_n
+ bfi_o
+ f_phq
+ f_phq_somatic
""", data=df[df['PATIENT']==1]) #+ sf_12
result = model.fit()
result.summary()


Out[4]:
OLS Regression Results
Dep. Variable: sf_12 R-squared: 0.997
Model: OLS Adj. R-squared: 0.941
Method: Least Squares F-statistic: 17.62
Date: Tue, 30 May 2017 Prob (F-statistic): 0.186
Time: 21:18:39 Log-Likelihood: -5.8866
No. Observations: 24 AIC: 57.77
Df Residuals: 1 BIC: 84.87
Df Model: 22
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
Intercept -2.1966 28.028 -0.078 0.950 -358.332 353.939
age 0.2576 0.118 2.175 0.274 -1.247 1.762
pohlavi 9.5727 7.045 1.359 0.404 -79.944 99.089
duration 0.1354 0.174 0.778 0.579 -2.075 2.345
s_fmdrs_sum 0.1015 0.331 0.307 0.811 -4.102 4.305
rls_dg -0.2703 3.634 -0.074 0.953 -46.449 45.908
bdi -0.4496 0.231 -1.947 0.302 -3.384 2.484
stai_i 0.1493 0.142 1.050 0.484 -1.658 1.957
stai_ii -0.4834 0.249 -1.945 0.302 -3.642 2.675
fss -2.5471 1.552 -1.641 0.348 -22.270 17.176
ess 0.9083 0.334 2.718 0.224 -3.338 5.154
vas_now 0.5326 0.374 1.424 0.390 -4.219 5.284
vas_4wk_aver 0.1866 0.679 0.275 0.829 -8.440 8.813
scc -0.9272 0.639 -1.452 0.384 -9.041 7.187
eq5d3l 1.4126 0.886 1.594 0.357 -9.847 12.672
eq5d3l_vas 0.1612 0.072 2.226 0.269 -0.759 1.081
bfi_e 1.2876 3.584 0.359 0.780 -44.254 46.830
bfi_p 3.8179 2.950 1.294 0.419 -33.660 41.296
bfi_s -1.3482 3.404 -0.396 0.760 -44.602 41.905
bfi_n 5.9076 3.018 1.958 0.301 -32.437 44.252
bfi_o 2.1493 2.667 0.806 0.568 -31.733 36.032
f_phq 3.9604 2.092 1.893 0.309 -22.626 30.546
f_phq_somatic -4.3481 2.225 -1.954 0.301 -32.620 23.924
Omnibus: 2.333 Durbin-Watson: 1.734
Prob(Omnibus): 0.311 Jarque-Bera (JB): 1.941
Skew: 0.583 Prob(JB): 0.379
Kurtosis: 2.237 Cond. No. 9.53e+03

In [5]:
model = smf.ols(formula="""s_fmdrs_sum ~
age
+ pohlavi
+ duration
+ sf_12
+ rls_dg
+ bdi
+ stai_i
+ stai_ii
+ fss
+ ess
+ vas_now
+ vas_4wk_aver
+ scc
+ eq5d3l
+ eq5d3l_vas
+ bfi_e
+ bfi_p
+ bfi_s
+ bfi_n
+ bfi_o
+ f_phq
+ f_phq_somatic
""", data=df[df['PATIENT']==1]) #+ sf_12
result = model.fit()
result.summary()


Out[5]:
OLS Regression Results
Dep. Variable: s_fmdrs_sum R-squared: 0.987
Model: OLS Adj. R-squared: 0.696
Method: Least Squares F-statistic: 3.390
Date: Tue, 30 May 2017 Prob (F-statistic): 0.407
Time: 21:18:40 Log-Likelihood: -31.358
No. Observations: 24 AIC: 108.7
Df Residuals: 1 BIC: 135.8
Df Model: 22
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
Intercept -49.9361 64.100 -0.779 0.579 -864.408 764.535
age -0.3071 0.760 -0.404 0.755 -9.959 9.345
pohlavi -25.8415 22.633 -1.142 0.458 -313.424 261.741
duration -0.1014 0.629 -0.161 0.898 -8.092 7.889
sf_12 0.8475 2.763 0.307 0.811 -34.261 35.956
rls_dg -6.7367 8.097 -0.832 0.558 -109.617 96.143
bdi 0.7556 1.250 0.604 0.654 -15.127 16.639
stai_i -0.3414 0.489 -0.699 0.612 -6.550 5.867
stai_ii 0.9506 1.251 0.760 0.586 -14.942 16.843
fss 5.7896 6.387 0.906 0.531 -75.370 86.949
ess -1.2973 2.478 -0.524 0.693 -32.786 30.191
vas_now -1.1076 1.520 -0.729 0.599 -20.420 18.205
vas_4wk_aver 0.1612 2.029 0.079 0.950 -25.614 25.937
scc -0.2900 3.241 -0.089 0.943 -41.469 40.888
eq5d3l 0.0995 4.818 0.021 0.987 -61.124 61.323
eq5d3l_vas -0.2049 0.468 -0.438 0.737 -6.150 5.740
bfi_e 8.1954 7.348 1.115 0.465 -85.172 101.562
bfi_p -8.2259 11.259 -0.731 0.598 -151.283 134.832
bfi_s 7.6508 7.311 1.047 0.486 -85.239 100.541
bfi_n -4.5403 18.627 -0.244 0.848 -241.222 232.142
bfi_o -2.9867 9.438 -0.316 0.805 -122.902 116.929
f_phq -5.5649 11.688 -0.476 0.717 -154.079 142.949
f_phq_somatic 6.5845 12.487 0.527 0.691 -152.076 165.245
Omnibus: 1.554 Durbin-Watson: 2.157
Prob(Omnibus): 0.460 Jarque-Bera (JB): 0.989
Skew: -0.496 Prob(JB): 0.610
Kurtosis: 2.919 Cond. No. 8.03e+03

In [6]:
model = smf.ols(formula="""eq5d3l ~
age
+ pohlavi
+ duration
+ sf_12
+ rls_dg
+ bdi
+ stai_i
+ stai_ii
+ fss
+ ess
+ vas_now
+ vas_4wk_aver
+ scc
+ s_fmdrs_sum
+ eq5d3l_vas
+ bfi_e
+ bfi_p
+ bfi_s
+ bfi_n
+ bfi_o
+ f_phq
+ f_phq_somatic
""", data=df[df['PATIENT']==1]) #+ sf_12
result = model.fit()
result.summary()


Out[6]:
OLS Regression Results
Dep. Variable: eq5d3l R-squared: 0.986
Model: OLS Adj. R-squared: 0.680
Method: Least Squares F-statistic: 3.222
Date: Tue, 30 May 2017 Prob (F-statistic): 0.417
Time: 21:18:40 Log-Likelihood: 6.3855
No. Observations: 24 AIC: 33.23
Df Residuals: 1 BIC: 60.32
Df Model: 22
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
Intercept 8.3363 14.655 0.569 0.671 -177.872 194.545
age -0.1399 0.097 -1.450 0.384 -1.367 1.087
pohlavi -4.1660 5.784 -0.720 0.603 -77.654 69.322
duration -0.0600 0.118 -0.510 0.700 -1.556 1.436
sf_12 0.5080 0.319 1.594 0.357 -3.541 4.557
rls_dg 0.8228 2.025 0.406 0.754 -24.903 26.549
bdi 0.2089 0.220 0.952 0.516 -2.581 2.999
stai_i -0.0640 0.106 -0.605 0.654 -1.409 1.281
stai_ii 0.2271 0.234 0.971 0.509 -2.745 3.199
fss 1.1611 1.361 0.853 0.550 -16.128 18.450
ess -0.4726 0.337 -1.403 0.394 -4.754 3.809
vas_now -0.2369 0.310 -0.764 0.585 -4.177 3.704
vas_4wk_aver -0.1488 0.395 -0.376 0.771 -5.170 4.872
scc 0.5145 0.437 1.177 0.448 -5.040 6.069
s_fmdrs_sum 0.0043 0.207 0.021 0.987 -2.632 2.640
eq5d3l_vas -0.0932 0.050 -1.848 0.316 -0.734 0.548
bfi_e -1.3572 1.837 -0.739 0.595 -24.698 21.984
bfi_p -1.9646 2.124 -0.925 0.525 -28.952 25.022
bfi_s 0.1214 2.192 0.055 0.965 -27.735 27.978
bfi_n -3.2520 2.291 -1.419 0.391 -32.368 25.864
bfi_o -0.9282 1.832 -0.507 0.701 -24.209 22.353
f_phq -1.9355 1.863 -1.039 0.488 -25.601 21.730
f_phq_somatic 2.1151 2.026 1.044 0.486 -23.631 27.861
Omnibus: 2.103 Durbin-Watson: 2.413
Prob(Omnibus): 0.349 Jarque-Bera (JB): 1.704
Skew: -0.628 Prob(JB): 0.427
Kurtosis: 2.647 Cond. No. 8.86e+03