import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt # package for doing plotting (necessary for adding the line)
import statsmodels.formula.api as smf # package we'll be using for linear regression

Assignment 1

Use the data from heights_weights_genders.csv to create a simple predictor that takes in a person's height and guesses their weight based on a model using all the data, regardless of gender. Find the weights and use those in your function (i.e. don't generate a model each time)

df = pd.read_csv("data/heights_weights_genders.csv")

Gender Height Weight
0 Male 73.847017 241.893563
1 Male 68.781904 162.310473
2 Male 74.110105 212.740856
3 Male 71.730978 220.042470
4 Male 69.881796 206.349801

lm = smf.ols(formula="Weight~Height",data=df).fit() #notice the formula regresses Y on X (Y~X)

lm.params #get the parameters from the model fit

Intercept   -350.737192
Height         7.717288
dtype: float64

intercept, slope = lm.params

plt.plot(df["Height"],slope*df["Height"]+intercept,"-",color="red") #we create the best fit line from the values in the fit model

[<matplotlib.lines.Line2D at 0x10769cbe0>]

def weight_predictor(height):
    return slope*height + intercept

