In [2]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
In [5]:
cd ..
In [6]:
df = pd.read_csv("data/heights_weights_genders.csv")
In [7]:
maledf = df[(df['Gender'] == 'Male')]
In [8]:
femaledf = df[(df['Gender'] == 'Female')]
In [9]:
maledf.plot(kind="scatter",x="Height",y="Weight")
Out[9]:
In [12]:
lm = smf.ols(formula="Weight~Height",data=maledf).fit()
In [13]:
lm.params
Out[13]:
In [15]:
intercept, slope = lm.params
In [16]:
df.plot(kind="scatter",x="Height",y="Weight")
plt.plot(df["Height"],slope*df["Height"]+intercept,"-",color="red")
Out[16]:
In [17]:
def getmaleweight(height):
weight = slope*height+intercept
return weight
In [18]:
getmaleweight(75)
Out[18]:
In [20]:
lm = smf.ols(formula="Weight~Height",data=femaledf).fit()
In [21]:
lm.params
Out[21]:
In [22]:
intercept, slope = lm.params
In [23]:
df.plot(kind="scatter",x="Height",y="Weight")
plt.plot(df["Height"],slope*df["Height"]+intercept,"-",color="red")
Out[23]:
In [24]:
def getfemaleweight(height):
weight = slope*height+intercept
return weight
In [25]:
getfemaleweight(69)
Out[25]:
In [ ]: