In [38]:
# Tell IPython that we want to show matplotlib plots using the inline renderer.
%matplotlib inline
import pandas as pd
from pandas.stats.api import ols
import numpy as np
In [39]:
# Build some fake data on which to do a regression.
x_start = 50
x_stop = 150
# np.arange(X, Y) returns a numpy array containing integer values in [X,Y).
x_vals = np.arange(x_start, x_stop)
# Build fake observations by taking a linear model
# and adding a random fuzz value to each entry.
base = 5.6 + (3 * x_vals)
observations_0 = base + 15 * np.random.randn(len(x_vals))
observations_1 = base + 15 * np.random.randn(len(x_vals))
df = pd.DataFrame(
{
'x': x_vals,
'y0': observations_0,
'y1': observations_1,
},
# This isn't strictly necessary, but will make it easier to align
# our plots later.
index=x_vals,
)
df
Out[39]:
In [40]:
regression = ols(x=df.x, y=pd.concat([df.y0, df.y1]))
regression
Out[40]:
In [65]:
import matplotlib.pyplot as plt
from matplotlib import lines
axis = df.plot(
x='x',
y='y0',
kind='scatter',
# Everything after this is optional.
marker='x',
linewidth='1.5',
color='red',
xlim=(45, 155),
ylim=(125, 485),
figsize=(12, 7),
label='Trial 0',
)
df.plot(
x='x',
y='y1',
kind='scatter',
ax=axis, # This tells matplotlib to add this plot on top of the previous one.
# Everything after this is optional.
marker='x',
linewidth='1.5',
color='blue',
label='Trial 1',
)
ols_result.y_fitted.plot(
ax=axis,
color='purple',
linewidth='2',
# See https://docs.python.org/2/library/string.html#format-specification-mini-language
# for an explanation of the format of the strings inside the curly braces here.
label='Regression: Y = {m:.4f}X + {b:.5}'.format(
m=regression.beta.x,
b=regression.beta.intercept,
)
)
plt.xlabel('My Awesome X Axis')
plt.ylabel('My Awesome Y Axis')
plt.legend(
loc='upper left',
scatterpoints=1,
)
plt.title('My Awesome Regression Analysis')
Out[65]: