%matplotlib inline
allows for inline display of plots
In [14]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
low
: indicator of birth weight less than 2.5kgage
: mother's age in yearslwt
: mother's weight in pounds at last menstrual periodrace
: mothers race ("white", "black", "other")smoke
: smoking status during pregnancyht
: history of hypertensionui
: presence of uterine irritabilityftv
: number of physician visits during the first trimesterptl
: number of previous premature laboursbwt
: birth weight in grams
In [3]:
df = pd.read_csv("~/low_birth_rate_full.csv")
df.columns = ["ID","LOW","AGE","LWT","RACE","SMOKE","PTL","HT","UI","FTV","BWT"]
df.info()
lmplot
method (linear model plot)
In [4]:
sns.set(style="ticks")
sns.lmplot(y='BWT',x='AGE',data=df)
Out[4]:
In [5]:
sns.lmplot(y="BWT",x="LWT",hue="RACE",data=df)
p = sns.lmplot(y="BWT",x="LWT",hue="RACE",data=df)
p.savefig("fancy-regression-chart.png")
In [6]:
sns.violinplot(y="BWT",x="SMOKE",data=df)
Out[6]:
In [ ]:
### Plot 'SMOKE' status, infant 'BWT', and hyptertension 'HT'
* Qualify difference between smoking and non-smoking mothers, with hypertension
* What happens when we add hypertension into the mix?
In [7]:
sns.violinplot(y="BWT",x="HT",hue="SMOKE",data=df)
Out[7]:
In [8]:
sns.factorplot(x="SMOKE",hue="LOW",col="HT",kind="count",data=df)
Out[8]:
In [11]:
import numpy as np
corr = df.corr()
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
f, ax = plt.subplots(figsize=(11, 9))
cmap = sns.diverging_palette(220, 10, as_cmap=True)
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
square=True, linewidths=.5, cbar_kws={"shrink": .5})
Out[11]:
In [12]:
df1 = df[['AGE','LWT','BWT','FTV','PTL','LOW']]
sns.pairplot(df1, hue = 'LOW',size=2.5);
In [13]:
sns.distplot(df['LWT'], bins = 20, rug = True)
Out[13]:
In [ ]: