Intial commands


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import redcaputils
import statsmodels.formula.api as smf
%matplotlib inline
pd.set_option('display.max_columns', None)

In [2]:
patients = pd.read_csv("patients.csv")
controls = pd.read_csv("controls.csv")

In [3]:
df = pd.concat([patients, controls])

Descriptional statistics


In [4]:
cols_rename = {
    'redcap_event_name': 'PATIENT',
    'bdi_v2': 'bdi',
    'stai_i_v2': 'stai_i',
    'stai_ii_v2': 'stai_ii',
    'fss_v2': 'fss',
    'ess_v2': 'ess',
    'vas_now_v2': 'vas_now',
    'vas_4wk_aver_v2': 'vas_4wk_aver',
    'scc_v2': 'scc',
    'eq5d3l_v2': 'eq5d3l',
    'eq5d3l_vas_v2': 'eq5d3l_vas',
    'bfi_e_v2': 'bfi_e',
    'bfi_p_v2': 'bfi_p',
    'bfi_s_v2': 'bfi_s',
    'bfi_n_v2': 'bfi_n',
    'bfi_o_v2': 'bfi_o',
    'sf_12_v2': 'sf_12',
    'f_phq_suma': 'f_phq',
}
values_changes = { 'F': 0, 'M': 1, 'enrollment_arm_1': 1, 'enrollment_arm_2': 0 }
df = redcaputils.convert_dataframe( df, columns_conversion_dict=cols_rename, values_conversion_dict=values_changes )
#df.head()

Chi-square test


In [44]:
obs = [
    [ len(df.loc[(df.PATIENT == 0) & (df.pohlavi == 0)]), len(df.loc[(df.PATIENT == 0) & (df.pohlavi == 1)])],
    [ len(df.loc[(df.PATIENT == 1) & (df.pohlavi == 0)]), len(df.loc[(df.PATIENT == 1) & (df.pohlavi == 1)])]
]
chi2, p, dof, expected = scipy.stats.chi2_contingency(obs)
print p


0.434915861161

In [45]:
obs = [
    [ len(df.loc[(df.PATIENT == 0) & (df.rls_dg == 0)]), len(df.loc[(df.PATIENT == 0) & (df.rls_dg == 1)])],
    [ len(df.loc[(df.PATIENT == 1) & (df.rls_dg == 0)]), len(df.loc[(df.PATIENT == 1) & (df.rls_dg == 1)])]
]
chi2, p, dof, expected = scipy.stats.chi2_contingency(obs)
print p


0.000213556159443