Using the heights_weights_genders.csv, analyze the difference between the height weight correlation in women and men.
In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
In [3]:
df = pd.read_csv("data/heights_weights_genders.csv")
In [4]:
df.groupby('Gender').count()
Out[4]:
In [5]:
df.plot(kind='scatter', y='Height', x='Weight')
Out[5]:
In [6]:
df[df['Gender'] == 'Male'].plot(kind='scatter', y='Height', x='Weight')
Out[6]:
In [7]:
df[df['Gender'] == 'Female'].plot(kind='scatter', y='Height', x='Weight')
Out[7]:
In [8]:
df[df['Gender'] == 'Female'].corr()
Out[8]:
In [9]:
df[df['Gender'] == 'Male'].corr()
Out[9]:
In [10]:
df[df['Gender'] == 'Female'].quantile(.25)
Out[10]:
In [11]:
df[df['Gender'] == 'Female'].quantile(.5)
Out[11]:
In [12]:
df[df['Gender'] == 'Female'].quantile(.75)
Out[12]:
In [14]:
df[df['Gender'] == 'Female'].mean()
Out[14]:
In [ ]:
#Working out the standard deviation in weight and height for women
In [15]:
df[df['Gender'] == 'Female'].std()
Out[15]:
In [16]:
df[df['Gender'] == 'Male'].std()
Out[16]:
In [ ]: