In [4]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np
In [5]:
df = pd.read_csv("data/hanford.csv")
In [6]:
df
Out[6]:
In [4]:
df.describe()
Out[4]:
In [5]:
df.hist()
Out[5]:
In [7]:
df.corr()
Out[7]:
In [8]:
df.plot(kind='scatter',x='Exposure',y='Mortality')
Out[8]:
In [19]:
lm = LinearRegression()
In [20]:
data = np.asarray(df[['Mortality','Exposure']])
x = data[:,1:]
y = data[:,0]
In [21]:
lm.fit(x,y)
Out[21]:
In [22]:
lm.score(x,y)
Out[22]:
In [25]:
m = lm.coef_[0]
m
Out[25]:
In [26]:
b = lm.intercept_
b
Out[26]:
In [21]:
df.plot(kind='scatter',x='Exposure',y='Mortality')
plt.plot(df['Exposure'],m*df['Exposure']+b,'-')
Out[21]:
In [24]:
lm.predict(10)
Out[24]:
In [1]:
import statsmodels.formula.api as smf
In [27]:
lm = smf.ols(formula='Mortality~Exposure',data=df).fit()
In [28]:
lm.params
Out[28]:
In [29]:
intercept, slope = lm.params
In [33]:
df.plot(kind='scatter',x='Exposure',y='Mortality')
plt.plot(df['Exposure'],slope*df['Exposure']+intercept,'-')
Out[33]:
In [34]:
plt.xkcd()
df.plot(kind='scatter',x='Exposure',y='Mortality')
plt.plot(df['Exposure'],slope*df['Exposure']+intercept,'-')
Out[34]:
In [30]:
lm.summary()
Out[30]:
In [31]:
lm.mse_model
Out[31]:
In [32]:
lm.pvalues
Out[32]:
In [ ]: