In [26]:
import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt # package for doing plotting (necessary for adding the line)
import statsmodels.formula.api as smf # package we'll be using for linear regression
%matplotlib inline
In [3]:
df = pd.read_csv('../data/hanford.csv')
County: Name of county
Exposuere: Inde of exposure
Mortality: Cancer mortality per 100000 man-years
In [5]:
df
Out[5]:
In [6]:
df.describe()
Out[6]:
In [14]:
correlation = df.corr()
print(correlation)
df.plot(kind='scatter', x='Exposure', y='Mortality')
Out[14]:
In [21]:
lm = smf.ols(formula="Mortality~Exposure",data=df).fit()
lm.params
intercept, height = lm.params
# Function using the built math.
def simplest_predictor(exposure, height, intercept):
height = float(height)
intercept = float(intercept)
exposure = float(exposure)
return height*exposure+intercept
In [22]:
# Input the data
exposure = input("Please enter the exposure: ")
print("The mortality rate for your exposure lies at", simplest_predictor(exposure,height,intercept), ".")
In [24]:
df.plot(kind="scatter",x="Exposure",y="Mortality")
plt.plot(df["Exposure"],height*df["Exposure"]+intercept,"-",color="darkgrey") #we create the best fit line from the values in the fit model
Out[24]:
In [ ]:
def predictiong_mortality_rate(exposure):
return intercept + float()