In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
import dateutil.parser
import pg8000
from pandas import DataFrame
In [2]:
df = pd.read_csv("heights_weights_genders.csv")
In [3]:
df.head()
Out[3]:
In [14]:
male = df[df['Gender']=='Male']
In [15]:
male['Height'].median()
Out[15]:
In [16]:
male['Weight'].median()
Out[16]:
In [17]:
male.plot(kind='scatter', x='Height', y='Weight')
plt.xlabel('Heights of Male Population')
plt.ylabel('Weights of Male Population')
Out[17]:
In [25]:
male.boxplot()
Out[25]:
In [18]:
female = df[df['Gender']=='Female']
In [19]:
female['Height'].median()
Out[19]:
In [20]:
female['Weight'].median()
Out[20]:
In [21]:
female.plot(kind='scatter', x='Height', y='Weight')
plt.xlabel('Heights of Male Population')
plt.ylabel('Weights of Male Population')
Out[21]:
In [9]:
male.corr(method='pearson', min_periods=1)
Out[9]:
In [24]:
female.boxplot()
Out[24]:
In [ ]: