In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
In [2]:
datafile = 'ex1/ex1data1.txt'
data = np.loadtxt(datafile, delimiter=',', usecols=(0,1), unpack=True)
In [3]:
X = data[0].reshape(97, 1)
y = data[1].reshape(97, 1)
In [4]:
# Split the data into training/testing sets
X_train = X[:-20]
X_test = X[-20:]
In [5]:
# Split the targets into training/testing sets
y_train = y[:-20]
y_test = y[-20:]
In [6]:
# Create linear regression object
lr = linear_model.LinearRegression()
In [7]:
# Train the model using the training sets
lr.fit(X_train, y_train)
Out[7]:
In [8]:
print "Coefficient: ", lr.coef_
print "Residual sum of squares: %.2f" % np.mean((lr.predict(X_test) - y_test) ** 2)
print 'Variance score: %.2f' % lr.score(X_test, y_test)
In [9]:
plt.figure(figsize=(10,6))
plt.scatter(X, y, color='red')
plt.plot(X, lr.predict(X), color='blue', linewidth=1)
plt.grid(True)
plt.ylabel('Profit in $10,000s')
plt.xlabel('Population of City in 10,000s')
plt.show()
In [10]:
datafile = 'ex1/ex1data2.txt'
data = np.loadtxt(datafile, delimiter=',', usecols=(0,1,2), unpack=True)
In [11]:
X = data[0:2]
y = data[2].reshape(47, 1)
In [12]:
# Split the data into training/testing sets
X_1 = X[0].reshape(47, 1)
X_2 = X[1].reshape(47, 1)
X_train = np.hstack((X_1[:-20], X_2[:-20]))
X_test = np.hstack((X_1[-20:], X_2[-20:]))
In [13]:
# Split the targets into training/testing sets
y_train = y[:-20]
y_test = y[-20:]
In [14]:
# Create linear regression object
lr = linear_model.LinearRegression()
In [15]:
# Train the model using the training sets
lr.fit(X_train, y_train)
Out[15]:
In [16]:
print "Coefficient: ", lr.coef_
print "Residual sum of squares: %.2f" % np.mean((lr.predict(X_test) - y_test) ** 2)
print 'Variance score: %.2f' % lr.score(X_test, y_test)
In [ ]: