In [1]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt # package for doing plotting (necessary for adding the line)
import statsmodels.formula.api as smf # package we'll be using for linear regression
In [2]:
df = pd.read_csv("data/heights_weights_genders.csv")
In [11]:
df.head()
Out[11]:
In [4]:
lm = smf.ols(formula="Weight~Height",data=df).fit() #notice the formula regresses Y on X (Y~X)
In [5]:
lm.params #get the parameters from the model fit
Out[5]:
In [6]:
intercept, slope = lm.params
In [8]:
df.plot(kind="scatter",x="Height",y="Weight")
plt.plot(df["Height"],slope*df["Height"]+intercept,"-",color="red") #we create the best fit line from the values in the fit model
Out[8]:
In [49]:
def weight_predictor(height):
return slope*height + intercept
In [50]:
weight_predictor(73.847017)
Out[50]:
In [ ]: