In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
%matplotlib inline
In [2]:
df = pd.read_csv('data/heights_weights_genders.csv')
print(df.shape)
df.columns
Out[2]:
In [3]:
plt.style.use('fivethirtyeight')
fig, ax = plt.subplots(figsize=(8,4))
for category, group in df.groupby('Gender'):
ax.plot(group['Height'], group['Weight'], marker='o', linestyle='', label=category, markersize=10, markeredgewidth=0, alpha=.5)
ax.legend(loc='upper left')
In [4]:
df.head(5)
#print("The correlation is", df.corr()['MdHHIncE']['RecycleRate'])
Out[4]:
In [5]:
print("Male correlation:", df[df['Gender']=='Male'].corr()['Height']['Weight'])
print("Female correlation:", df[df['Gender']=='Female'].corr()['Height']['Weight'])
print("Whole dataset correlation:", df.corr()['Height']['Weight'])
print("Simpson's paradox!?")