In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
# Import data
raw_data = """
Year,Employed,GNP
1947,60.323,234.289
1948,61.122,259.426
1949,60.171,258.054
1950,61.187,284.599
1951,63.221,328.975
1952,63.639,346.999
1953,64.989,365.385
1954,63.761,363.112
1955,66.019,397.469
1956,67.857,419.18
1957,68.169,442.769
1958,66.513,444.546
1959,68.655,482.704
1960,69.564,502.601
1961,69.331,518.173
1962,70.551,554.894"""
data = []
for line in raw_data.splitlines()[2:]:
words = line.split(',')
data.append(words)
data = np.array(data, dtype=np.float)
n_obs = data.shape[0]
In [3]:
plt.plot(data[:, 2], data[:, 1], 'bo')
plt.xlabel("GNP")
plt.ylabel("Employed")
Out[3]:
In [4]:
X = np.c_[np.ones(n_obs), data[:, 2]]
Y = data[:, 1]
In [5]:
beta = np.dot(np.linalg.inv(np.dot(X.T, X)),np.dot(X.T, Y))
In [8]:
print(beta)
In [7]:
# Bonus
x = np.linspace(200, 600)
plt.plot(x, beta[0] + beta[1]*x, 'b-')
plt.plot(data[:, 2], data[:, 1], 'bo')
plt.xlabel("GNP")
plt.ylabel("Employed")
Out[7]: