In [10]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np
In [25]:
df = pd.read_csv("data/hanford.csv")
In [26]:
df
Out[26]:
In [4]:
df.describe()
Out[4]:
In [5]:
df.hist()
Out[5]:
In [7]:
df.corr()
Out[7]:
In [8]:
df.plot(kind='scatter',x='Exposure',y='Mortality')
Out[8]:
In [9]:
lm = LinearRegression()
In [11]:
data = np.asarray(df[['Mortality','Exposure']])
x = data[:,1:]
y = data[:,0]
In [12]:
lm.fit(x,y)
Out[12]:
In [14]:
lm.score(x,y)
Out[14]:
In [19]:
m = lm.coef_[0]
In [20]:
b = lm.intercept_
In [21]:
df.plot(kind='scatter',x='Exposure',y='Mortality')
plt.plot(df['Exposure'],m*df['Exposure']+b,'-')
Out[21]:
In [24]:
lm.predict(10)
Out[24]:
In [ ]: