In [2]:
import statsmodels.formula.api as smf #package used for linear regression

Use the data from heights_weights_genders.csv to create a simple predictor that takes in a person's height and guesses their weight based on a model using all the data, regardless of gender. To do this, find the parameters (lm.params) and use those in your function (i.e. don't generate a model each time)


In [4]:
df=pd.read_csv("heights_weights_genders.csv")

In [5]:
df.head()


Out[5]:
Gender Height Weight
0 Male 73.847017 241.893563
1 Male 68.781904 162.310473
2 Male 74.110105 212.740856
3 Male 71.730978 220.042470
4 Male 69.881796 206.349801

In [6]:
lm = smf.ols(formula="Weight~Height",data=df).fit() # formula regresses weight on Height (Weight~Height)
lm.params


Out[6]:
Intercept   -350.737192
Height         7.717288
dtype: float64

In [7]:
def predict_weight(height):
    m=7.717288
    b=-350.737192
    y=b + m*float(height)
    return y

In [12]:
df['Weight_Predictor']=df['Height'].apply(predict_weight)

In [13]:
df.head()


Out[13]:
Gender Height Weight weight_predictor Weight_Predictor
0 Male 73.847017 241.893563 219.161506 219.161506
1 Male 68.781904 162.310473 180.072571 180.072571
2 Male 74.110105 212.740856 221.191835 221.191835
3 Male 71.730978 220.042470 202.831427 202.831427
4 Male 69.881796 206.349801 188.560753 188.560753

In [ ]: