In [ ]:
# write out some toy data
from sklearn.datasets import load_digits
import pickle

digits = load_digits()

X, y = digits.data, digits.target

for i in range(10):
    pickle.dump((X[i::10] / 16., y[i::10]), open("data/batch_%02d.pickle" % i, "wb"), -1)

In [ ]:
from sklearn.linear_model import SGDClassifier

In [ ]:
sgd = SGDClassifier(random_state=1)

for i in range(9):
    X_batch, y_batch = pickle.load(open("data/batch_%02d.pickle" % i, "rb"))
    sgd.partial_fit(X_batch, y_batch, classes=range(10))

In [ ]:
X_test, y_test = pickle.load(open("data/batch_09.pickle", "rb"))

sgd.score(X_test, y_test)

Exercise

Iterate over the dataset ten times, print the error on the hold-out batch (09) for each pass. Try changing the learning rate (and eta0) and see how that affects results.


In [ ]:
# %load solutions/out_of_core.py