Analysis of the UCI dataset https://archive.ics.uci.edu/ml/datasets/Acute+Inflammations.
In [1]:
import numpy as np
import pandas as pd
%pylab inline
pylab.style.use('ggplot')
import seaborn as sns
In [2]:
data_df = pd.read_csv('diagnosis.csv', sep='\t', decimal=',', header=None)
In [3]:
data_df.head()
Out[3]:
a1 Temperature of patient { 35C-42C }
a2 Occurrence of nausea { yes, no }
a3 Lumbar pain { yes, no }
a4 Urine pushing (continuous need for urination) { yes, no }
a5 Micturition pains { yes, no }
a6 Burning of urethra, itch, swelling of urethra outlet { yes, no }
d1 decision: Inflammation of urinary bladder { yes, no }
d2 decision: Nephritis of renal pelvis origin { yes, no }
In [4]:
data_df.columns = ['temp', 'nausea', 'lumber_pain', 'urine_pushing', 'micturiation_pain',
'burning', 'inflammation', 'nephritis']
In [5]:
for c in data_df.columns[1:]:
data_df.loc[:, c] = data_df.loc[:, c].map(lambda v : 1.0 if v == 'yes' else 0.0)
In [6]:
data_df.head()
Out[6]:
In [7]:
feature_df = data_df.loc[:, data_df.columns[:-2]]
target_df = data_df.loc[:, data_df.columns[-2:]]
In [8]:
feature_df.head()
Out[8]:
In [9]:
target_df.head()
Out[9]:
In [10]:
sns.lmplot(data=data_df, x='temp', hue='inflammation', y='inflammation', fit_reg=False)
Out[10]:
In [11]:
for fname in feature_df.columns[1:]:
pylab.figure()
sns.countplot(y=fname, hue="inflammation", data=data_df)
In [12]:
import statsmodels.formula.api as sm
In [13]:
inflammation_model = sm.logit(
formula='inflammation ~ lumber_pain + urine_pushing + micturiation_pain',
data=data_df
)
inflammation_result = inflammation_model.fit(method='lbfgs')
inflammation_result.summary()
Out[13]:
In [14]:
sns.lmplot(data=data_df, x='temp', hue='nephritis', y='nephritis', fit_reg=False)
Out[14]:
In [15]:
for fname in feature_df.columns[1:]:
pylab.figure()
sns.countplot(y=fname, hue="nephritis", data=data_df)
In [16]:
nephritis_model = sm.ols(
formula='nephritis ~ temp + nausea + urine_pushing + lumber_pain + burning',
data=data_df
)
nephritis_result = nephritis_model.fit()
nephritis_result.summary()
Out[16]: