In [23]:
%matplotlib inline
# Using Stochastic Gradient Descent (SGD) for Regression
# SGD is widely used for regression because it's simple and
# speedy. It is used because it is easily explainable.

In [7]:
from sklearn import datasets

In [8]:
X,y = datasets.make_regression(int(1e6))

In [9]:
"{:,}".format(int(1e6))


Out[9]:
'1,000,000'

In [10]:
"{:,}".format(X.nbytes)


Out[10]:
'800,000,000'

In [12]:
X.nbytes / 1e6 # the number of megabytes


Out[12]:
800.0

In [13]:
# number of bytes per data point:
X.nbytes / (X.shape[0]*X.shape[1])


Out[13]:
8

In [14]:
# Fit a SGDRegressor model

In [25]:
from sklearn import linear_model
import numpy as np
from matplotlib import pyplot as plt

In [26]:
sgd = linear_model.SGDRegressor()

In [27]:
train = np.random.choice([True, False], size=len(y), p=[.75, .25])

In [28]:
sgd.fit(X[train], y[train])


Out[28]:
SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
       loss='squared_loss', n_iter=5, penalty='l2', power_t=0.25,
       random_state=None, shuffle=True, verbose=0, warm_start=False)

In [29]:
# this defaults to squared_loss which is the same as linear
# regression. Using shuffle=True will create a random shuffle
# of the data.

In [30]:
test_preds = sgd.predict(X[~train])

f, ax = plt.subplots(figsize=(7,5))
f.tight_layout()
ax.hist(test_preds - y[~train], label='Test Predictions', color='b', alpha=.5)

ax.set_title('Residuals')
ax.legend(loc='best')


Out[30]:
<matplotlib.legend.Legend at 0x107aa8e90>

In [ ]: