In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import redcaputils
import statsmodels.formula.api as smf
%matplotlib inline
pd.set_option('display.max_columns', None)
In [2]:
patients = pd.read_csv("patients.csv")
controls = pd.read_csv("controls.csv")
In [3]:
df = pd.concat([patients, controls])
In [4]:
cols_rename = {
'redcap_event_name': 'PATIENT',
'bdi_v2': 'bdi',
'stai_i_v2': 'stai_i',
'stai_ii_v2': 'stai_ii',
'fss_v2': 'fss',
'ess_v2': 'ess',
'vas_now_v2': 'vas_now',
'vas_4wk_aver_v2': 'vas_4wk_aver',
'scc_v2': 'scc',
'eq5d3l_v2': 'eq5d3l',
'eq5d3l_vas_v2': 'eq5d3l_vas',
'bfi_e_v2': 'bfi_e',
'bfi_p_v2': 'bfi_p',
'bfi_s_v2': 'bfi_s',
'bfi_n_v2': 'bfi_n',
'bfi_o_v2': 'bfi_o',
'sf_12_v2': 'sf_12',
'f_phq_suma': 'f_phq',
}
values_changes = { 'F': 0, 'M': 1, 'enrollment_arm_1': 1, 'enrollment_arm_2': 0 }
df = redcaputils.convert_dataframe( df, columns_conversion_dict=cols_rename, values_conversion_dict=values_changes )
#df.head()
In [44]:
obs = [
[ len(df.loc[(df.PATIENT == 0) & (df.pohlavi == 0)]), len(df.loc[(df.PATIENT == 0) & (df.pohlavi == 1)])],
[ len(df.loc[(df.PATIENT == 1) & (df.pohlavi == 0)]), len(df.loc[(df.PATIENT == 1) & (df.pohlavi == 1)])]
]
chi2, p, dof, expected = scipy.stats.chi2_contingency(obs)
print p
In [45]:
obs = [
[ len(df.loc[(df.PATIENT == 0) & (df.rls_dg == 0)]), len(df.loc[(df.PATIENT == 0) & (df.rls_dg == 1)])],
[ len(df.loc[(df.PATIENT == 1) & (df.rls_dg == 0)]), len(df.loc[(df.PATIENT == 1) & (df.rls_dg == 1)])]
]
chi2, p, dof, expected = scipy.stats.chi2_contingency(obs)
print p