In [4]:
from sklearn import linear_model
clf = linear_model.LinearRegression()
clf.fit([[0,0],[1,1],[2,2]], [0,1,2])
print clf.coef_
print clf.intercept_
print clf.predict([3,3])
print clf.score([[0,0],[1,1],[2,2]], [0,1,2])
print clf.score([[4,4],[5,5],[6,6]], [4,5,6])
In [ ]:
#!/usr/bin/python
import numpy
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
from studentRegression import studentReg
from class_vis import prettyPicture, output_image
from ages_net_worths import ageNetWorthData
ages_train, ages_test, net_worths_train, net_worths_test = ageNetWorthData()
reg = studentReg(ages_train, net_worths_train)
plt.clf()
plt.scatter(ages_train, net_worths_train, color="b", label="train data")
plt.scatter(ages_test, net_worths_test, color="r", label="test data")
plt.plot(ages_test, reg.predict(ages_test), color="black")
plt.legend(loc=2)
plt.xlabel("ages")
plt.ylabel("net worths")
plt.savefig("test.png")
output_image("test.png", "png", open("test.png", "rb").read())
In [ ]:
def studentReg(ages_train, net_worths_train):
### import the sklearn regression module, create, and train your regression
### name your regression reg
### your code goes here!
from sklearn import linear_model
clf = linear_model.LinearRegression()
reg = clf.fit(ages_train, net_worths_train)
return reg
In [ ]:
import numpy
import matplotlib.pyplot as plt
from ages_net_worths import ageNetWorthData
ages_train, ages_test, net_worths_train, net_worths_test = ageNetWorthData()
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(ages_train, net_worths_train)
### get Katie's net worth (she's 27)
### sklearn predictions are returned in an array,
### so you'll want to do something like net_worth = predict([27])[0]
### (not exact syntax, the point is that [0] at the end)
km_net_worth = reg.predict([27])[0] ### fill in the line of code to get the right value
### get the slope
### again, you'll get a 2-D array, so stick the [0][0] at the end
slope = reg.coef_[0][0] ### fill in the line of code to get the right value
### get the intercept
### here you get a 1-D array, so stick [0] on the end to access
### the info we want
intercept = reg.intercept_[0] ### fill in the line of code to get the right value
### get the score on test data
test_score = reg.score(ages_test, net_worths_test) ### fill in the line of code to get the right value
### get the score on the training data
training_score = reg.score(ages_train, net_worths_train) ### fill in the line of code to get the right value
def submitFit():
return {"networth":km_net_worth,
"slope":slope,
"intercept":intercept,
"stats on test":test_score,
"stats on training": training_score}
In [ ]:
import numpy
import random
def ageNetWorthData():
random.seed(42)
numpy.random.seed(42)
ages = []
for ii in range(100):
ages.append( random.randint(20,65) )
net_worths = [ii * 6.25 + numpy.random.normal(scale=40.) for ii in ages]
### need massage list into a 2d numpy array to get it to work in LinearRegression
ages = numpy.reshape( numpy.array(ages), (len(ages), 1))
net_worths = numpy.reshape( numpy.array(net_worths), (len(net_worths), 1))
from sklearn.cross_validation import train_test_split
ages_train, ages_test, net_worths_train, net_worths_test = train_test_split(ages, net_worths)
return ages_train, ages_test, net_worths_train, net_worths_test