In [1]:
import pandas as pd
%matplotlib inline

In [2]:
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

In [3]:
df = pd.read_csv("heights_weights_genders.csv")

In [4]:
df.head()


Out[4]:
Gender Height Weight
0 Male 73.847017 241.893563
1 Male 68.781904 162.310473
2 Male 74.110105 212.740856
3 Male 71.730978 220.042470
4 Male 69.881796 206.349801

In [93]:
lm = smf.ols(formula="Weight ~ Height + Gender",data=df).fit()

In [94]:
lm.params


Out[94]:
Intercept        -244.923503
Gender[T.Male]     19.377711
Height              5.976941
dtype: float64

In [106]:
def guess_weight(height, gender):
    for item in df['Weight']:
        intercept = -244.923503
        male = 19.377711
        if gender == 'Male':
            weight = height * 5.97 + intercept + male
        else:
            weight = height * 5.97 + intercept
    return round(weight)

In [107]:
guess_weight(70, 'Male')


Out[107]:
192

In [108]:
guess_weight(70, 'Female')


Out[108]:
173