In [4]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# visualization libraries
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# plot the visuals in ipython
%matplotlib inline

In [10]:
df = pd.read_json("data/linreg.json" )
df.columns = ["hours", "score"]
x = df["hours"]
y = df["score"]

In [11]:
X = x
X2 = np.column_stack((X, X**2))
X3 = np.column_stack((X, X**2, X**3))
X1 = sm.add_constant(X)
X2 = sm.add_constant(X2)
X3 = sm.add_constant(X3)

est1 = sm.OLS(y, X1).fit()
est2 = sm.OLS(y, X2).fit()
est3 = sm.OLS(y, X3).fit()

xx = np.arange(24)

r1 = "r2 = %.4f" % est1.rsquared
r2 = "r2 = %.4f" % est2.rsquared
r3 = "r2 = %.4f" % est3.rsquared

p = est1.params
y_lin = p[0] + p[1]*xx

p = est2.params
y_p2 = p[0] + p[1]*xx + p[2]*xx**2

p = est3.params
y_p3 = p[0] + p[1]*xx + p[2]*xx**2 + p[3]*xx**3

In [15]:
fig = plt.figure()
fig.set_size_inches(16, 12)
ax = df.plot(kind='scatter', x='hours', y='score',label='Samples')
ax.set_ylim([200,900])
ax.legend(loc='center left', bbox_to_anchor=(0.67, 0.12))
plt.title("Score vs Effort")
plt.savefig("img/linreg-no.png", dpi=200)

fig = plt.figure()
fig.set_size_inches(16, 12)
ax = df.plot(kind='scatter', x='hours', y='score',label='Samples')
ax.set_ylim([200,900])
ax.plot(xx,y_lin, '-', color="green", label=r1)
ax.legend(loc='center left', bbox_to_anchor=(0.61, 0.15))
plt.title("Score vs Effort, Linear")
plt.savefig("img/linreg-p1.png", dpi=200)

fig = plt.figure()
fig.set_size_inches(16, 12)
ax = df.plot(kind='scatter', x='hours', y='score',label='Samples')
ax.set_ylim([200,900])
ax.plot(xx,y_p2, '-', color="green", label=r2)
ax.legend(loc='center left', bbox_to_anchor=(0.61, 0.15))
plt.title("Score vs Effort, 2nd-order Polynomial")
plt.savefig("img/linreg-p2.png", dpi=200)

fig = plt.figure()
fig.set_size_inches(16, 12)
ax = df.plot(kind='scatter', x='hours', y='score',label='Samples')
ax.set_ylim([200,900])
ax.plot(xx,y_p3, '-', color="green", label=r3)
ax.legend(loc='center left', bbox_to_anchor=(0.61, 0.15))
plt.title("Score vs Effort, 3rd-order Polynomial")
plt.savefig("img/linreg-p3.png", dpi=200)


<matplotlib.figure.Figure at 0x7f23f72f7a90>
<matplotlib.figure.Figure at 0x7f23f7295dd8>
<matplotlib.figure.Figure at 0x7f23f5f0c160>
<matplotlib.figure.Figure at 0x7f23f5fa9b00>