Doing the DoNow with sci-kit learn


In [14]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np

In [15]:
df = pd.read_csv("data/hanford.csv")

In [16]:
df


Out[16]:
County Exposure Mortality
0 Umatilla 2.49 147.1
1 Morrow 2.57 130.1
2 Gilliam 3.41 129.9
3 Sherman 1.25 113.5
4 Wasco 1.62 137.5
5 HoodRiver 3.83 162.3
6 Portland 11.64 207.5
7 Columbia 6.41 177.9
8 Clatsop 8.34 210.3

In [17]:
lm = LinearRegression()

In [18]:
type(lm)


Out[18]:
sklearn.linear_model.base.LinearRegression

In [19]:
data = np.asarray(df[['Mortality','Exposure']])
x = data[:,1:]
y = data[:,0]

In [20]:
data


Out[20]:
array([[ 147.1 ,    2.49],
       [ 130.1 ,    2.57],
       [ 129.9 ,    3.41],
       [ 113.5 ,    1.25],
       [ 137.5 ,    1.62],
       [ 162.3 ,    3.83],
       [ 207.5 ,   11.64],
       [ 177.9 ,    6.41],
       [ 210.3 ,    8.34]])

In [21]:
x


Out[21]:
array([[  2.49],
       [  2.57],
       [  3.41],
       [  1.25],
       [  1.62],
       [  3.83],
       [ 11.64],
       [  6.41],
       [  8.34]])

In [22]:
lm.fit(x,y)


Out[22]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [23]:
lm.coef_


Out[23]:
array([ 9.23145627])

In [24]:
lm.score(x,y)


Out[24]:
0.85811472686989476

In [25]:
slope = lm.coef_[0]

In [26]:
intercept = lm.intercept_

In [27]:
df.plot(kind='scatter',x='Exposure',y='Mortality')
plt.plot(df['Exposure'],slope*df['Exposure']+intercept,'-')


Out[27]:
[<matplotlib.lines.Line2D at 0x7ffb809c1320>]

In [28]:
lm.predict(10)


Out[28]:
array([ 207.03019353])

Polynomial functions


In [29]:
my_list = [] #list of test values for our function
for i in range(-100,100):
    my_list.append(i*0.1)
print(my_list)


[-10.0, -9.9, -9.8, -9.700000000000001, -9.600000000000001, -9.5, -9.4, -9.3, -9.200000000000001, -9.1, -9.0, -8.9, -8.8, -8.700000000000001, -8.6, -8.5, -8.4, -8.3, -8.200000000000001, -8.1, -8.0, -7.9, -7.800000000000001, -7.7, -7.6000000000000005, -7.5, -7.4, -7.300000000000001, -7.2, -7.1000000000000005, -7.0, -6.9, -6.800000000000001, -6.7, -6.6000000000000005, -6.5, -6.4, -6.300000000000001, -6.2, -6.1000000000000005, -6.0, -5.9, -5.800000000000001, -5.7, -5.6000000000000005, -5.5, -5.4, -5.300000000000001, -5.2, -5.1000000000000005, -5.0, -4.9, -4.800000000000001, -4.7, -4.6000000000000005, -4.5, -4.4, -4.3, -4.2, -4.1000000000000005, -4.0, -3.9000000000000004, -3.8000000000000003, -3.7, -3.6, -3.5, -3.4000000000000004, -3.3000000000000003, -3.2, -3.1, -3.0, -2.9000000000000004, -2.8000000000000003, -2.7, -2.6, -2.5, -2.4000000000000004, -2.3000000000000003, -2.2, -2.1, -2.0, -1.9000000000000001, -1.8, -1.7000000000000002, -1.6, -1.5, -1.4000000000000001, -1.3, -1.2000000000000002, -1.1, -1.0, -0.9, -0.8, -0.7000000000000001, -0.6000000000000001, -0.5, -0.4, -0.30000000000000004, -0.2, -0.1, 0.0, 0.1, 0.2, 0.30000000000000004, 0.4, 0.5, 0.6000000000000001, 0.7000000000000001, 0.8, 0.9, 1.0, 1.1, 1.2000000000000002, 1.3, 1.4000000000000001, 1.5, 1.6, 1.7000000000000002, 1.8, 1.9000000000000001, 2.0, 2.1, 2.2, 2.3000000000000003, 2.4000000000000004, 2.5, 2.6, 2.7, 2.8000000000000003, 2.9000000000000004, 3.0, 3.1, 3.2, 3.3000000000000003, 3.4000000000000004, 3.5, 3.6, 3.7, 3.8000000000000003, 3.9000000000000004, 4.0, 4.1000000000000005, 4.2, 4.3, 4.4, 4.5, 4.6000000000000005, 4.7, 4.800000000000001, 4.9, 5.0, 5.1000000000000005, 5.2, 5.300000000000001, 5.4, 5.5, 5.6000000000000005, 5.7, 5.800000000000001, 5.9, 6.0, 6.1000000000000005, 6.2, 6.300000000000001, 6.4, 6.5, 6.6000000000000005, 6.7, 6.800000000000001, 6.9, 7.0, 7.1000000000000005, 7.2, 7.300000000000001, 7.4, 7.5, 7.6000000000000005, 7.7, 7.800000000000001, 7.9, 8.0, 8.1, 8.200000000000001, 8.3, 8.4, 8.5, 8.6, 8.700000000000001, 8.8, 8.9, 9.0, 9.1, 9.200000000000001, 9.3, 9.4, 9.5, 9.600000000000001, 9.700000000000001, 9.8, 9.9]

In [30]:
plt.figure(figsize=(10,10))
plt.ylim(-10,10)
plt.axhline(0, color='black')
plt.axvline(0, color='black')
plt.plot(my_list,my_list,label="Linear")
plt.plot(my_list,list(map(lambda x: x**2, my_list)),label="2nd degree")
plt.plot(my_list,list(map(lambda x: x**3, my_list)),label="3rd degree")
plt.plot(my_list,list(map(lambda x: x**4, my_list)),label="4th degree")
plt.plot(my_list,list(map(lambda x: x**5, my_list)),label="5th degree")
plt.plot(my_list,list(map(lambda x: x**6, my_list)),label="6th degree")
plt.plot(my_list,list(map(lambda x: x**7, my_list)),label="7th degree")
plt.plot(my_list,list(map(lambda x: x**8, my_list)),label="8th degree")
plt.plot(my_list,list(map(lambda x: x**9, my_list)),label="9th degree")
plt.plot(my_list,list(map(lambda x: x**10, my_list)),label="10th degree")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


Out[30]:
<matplotlib.legend.Legend at 0x7ffb807c65f8>

In [ ]: