Assignment 1

  1. Use the data from heights_weights_genders.csv to create a simple predictor that takes in a person's height and guesses their weight based on a model using all the data, regardless of gender
  2. Find the weights and use those in your function (i.e. don't generate a model each time)

In [1]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt # package for doing plotting (necessary for adding the line)
import statsmodels.formula.api as smf # package we'll be using for linear regression

In [3]:
df = pd.read_csv("data/heights_weights_genders.csv")
df.head()


Out[3]:
Gender Height Weight
0 Male 73.847017 241.893563
1 Male 68.781904 162.310473
2 Male 74.110105 212.740856
3 Male 71.730978 220.042470
4 Male 69.881796 206.349801

In [8]:
lm = smf.ols(formula="Weight~Height",data=df).fit()
lm.params


Out[8]:
Intercept   -350.737192
Height         7.717288
dtype: float64

In [9]:
def weight_predictor(your_file):
    df = pd.read_csv(your_file)
    lm = smf.ols(formula="Weight~Height",data=df).fit()
    weight = (df['Height']) * lm.params.Height + lm.params.Intercept
    return weight

In [10]:
weight_predictor("data/heights_weights_genders.csv")


Out[10]:
0       219.161480
1       180.072546
2       221.191809
3       202.831401
4       188.560728
5       168.273675
6       180.097066
7       176.727962
8       166.467320
9       138.974825
10      198.698052
11      202.135508
12      149.083200
13      183.940189
14      183.636607
15      171.303513
16      208.135788
17      142.971081
18      186.695182
19      173.544499
20      173.382786
21      185.146939
22      159.754687
23      229.648939
24      173.214133
25      175.149910
26      182.447367
27      211.087858
28      169.571928
29      177.869348
           ...    
9970    155.661475
9971    148.110190
9972    117.345114
9973    117.987944
9974    153.926703
9975    160.547955
9976    123.690045
9977    146.988021
9978     92.048040
9979    128.166902
9980    115.944648
9981    115.720862
9982    188.458208
9983    157.297454
9984    104.945712
9985    174.355279
9986    138.173798
9987    155.595928
9988    108.740302
9989    119.670727
9990    136.837169
9991    132.648046
9992    128.335291
9993    112.534933
9994    105.341003
9995    159.936199
9996    166.839332
9997    142.150475
9998    182.019919
9999    127.304371
Name: Height, dtype: float64

In [14]:
def weight_predict(your_height):
    df = pd.read_csv("data/heights_weights_genders.csv")
    lm = smf.ols(formula="Weight~Height",data=df).fit()
    weight = your_height * lm.params.Height + lm.params.Intercept
    return weight

In [15]:
weight_predict(73.847017)


Out[15]:
219.16147979083121

In [ ]: