In [26]:
    
import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt # package for doing plotting (necessary for adding the line)
import statsmodels.formula.api as smf # package we'll be using for linear regression
%matplotlib inline
    
In [3]:
    
df = pd.read_csv('../data/hanford.csv')
    
County: Name of county
Exposuere: Inde of exposure
Mortality: Cancer mortality per 100000 man-years
In [5]:
    
df
    
    Out[5]:
In [6]:
    
df.describe()
    
    Out[6]:
In [14]:
    
correlation = df.corr()
print(correlation)
df.plot(kind='scatter', x='Exposure', y='Mortality')
    
    
    Out[14]:
    
In [21]:
    
lm = smf.ols(formula="Mortality~Exposure",data=df).fit()
lm.params
intercept, height = lm.params
# Function using the built math. 
def simplest_predictor(exposure, height, intercept):
    height = float(height)
    intercept = float(intercept)
    exposure = float(exposure)
    return height*exposure+intercept
    
In [22]:
    
# Input the data
exposure = input("Please enter the exposure: ")
print("The mortality rate for your exposure lies at", simplest_predictor(exposure,height,intercept), ".")
    
    
In [24]:
    
df.plot(kind="scatter",x="Exposure",y="Mortality")
plt.plot(df["Exposure"],height*df["Exposure"]+intercept,"-",color="darkgrey") #we create the best fit line from the values in the fit model
    
    Out[24]:
    
In [ ]:
    
def predictiong_mortality_rate(exposure):
    return intercept + float()