In [1]:
from time import time
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.datasets import fetch_mldata
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sknn.mlp import Classifier, Layer

In [2]:
mnist = fetch_mldata('mnist-original')
X_train, X_test, y_train, y_test = train_test_split(
        (mnist.data / 255.0).astype(np.float32),
        mnist.target.astype(np.int32),
        test_size=0.33, random_state=1234)

In [3]:
clf = Classifier(
        layers=[Layer("Rectifier", units=300), Layer("Softmax")],
        learning_rate=0.02,
        batch_size=100,
        n_iter=2,
        verbose=1,
        valid_size=0.5
)
clf.fit(X_train, y_train)


Out[3]:
Classifier(batch_size=100, debug=False, dropout_rate=None, f_stable=0.001,
      hidden0=<sknn.nn.Layer `Rectifier`: name=u'hidden0', units=300>,
      layers=[<sknn.nn.Layer `Rectifier`: name=u'hidden0', units=300>, <sknn.nn.Layer `Softmax`: name=u'output', units=10>],
      learning_momentum=0.9, learning_rate=0.02, learning_rule=u'sgd',
      loss_type=u'mse', n_iter=2, n_stable=50,
      output=<sknn.nn.Layer `Softmax`: name=u'output', units=10>,
      random_state=None, regularize=None,
      valid_set=(array([[ 0.,  0., ...,  0.,  0.],
       [ 0.,  0., ...,  0.,  0.],
       ...,
       [ 0.,  0., ...,  0.,  0.],
       [ 0.,  0., ...,  0.,  0.]], dtype=float32), array([[0, 0, ..., 0, 0],
       [0, 0, ..., 0, 0],
       ...,
       [0, 0, ..., 0, 0],
       [1, 0, ..., 0, 0]])),
      valid_size=0.5, verbose=1, weight_decay=None)

In [4]:
print "the dim of train set : %s, %s" %(X_train.shape[0], (X_train.shape[1]))
print "the dim of validation set : %s, %s" %(clf.valid_set[0].shape[0], clf.valid_set[0].shape[1])


the dim of train set : 46900, 784
the dim of validation set : 23450, 784

In [8]:
print y_test.shape
y_test_reshape = y_test.reshape((y_test.shape[0], 1))
print y_test_reshape.shape


(23100,)
(23100, 1)

In [10]:
clf1 = Classifier(
        layers=[Layer("Rectifier", units=300), Layer("Softmax")],
        learning_rate=0.02,
        batch_size=100,
        n_iter=2,
        verbose=1,
        valid_set=(X_test, y_test_reshape)
)
clf1.fit(X_train, y_test.reshape((y_test.shape[0], 1)))


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-10-0cdb0283286c> in <module>()
      7         valid_set=(X_test, y_test_reshape)
      8 )
----> 9 clf1.fit(X_train, y_train.reshape((y_train.shape[0], 1)))

/Users/dikien/anaconda/lib/python2.7/site-packages/scikit_neuralnetwork-0.2-py2.7.egg/sknn/mlp.pyc in fit(self, X, y)
    470         yp = self.label_binarizer.transform(y)
    471         # Now train based on a problem transformed into regression.
--> 472         return super(Classifier, self)._fit(X, yp, test=y)
    473 
    474     def partial_fit(self, X, y, classes=None):

/Users/dikien/anaconda/lib/python2.7/site-packages/scikit_neuralnetwork-0.2-py2.7.egg/sknn/mlp.pyc in _fit(self, *data, **extra)
    334     def _fit(self, *data, **extra):
    335         try:
--> 336             return self._train(*data, **extra)
    337         except RuntimeError as e:
    338             log.error("\n{}{}{}\n\n{}\n".format(

/Users/dikien/anaconda/lib/python2.7/site-packages/scikit_neuralnetwork-0.2-py2.7.egg/sknn/mlp.pyc in _train(self, X, y, test)
    376                       "\n---------------------------------")
    377 
--> 378         self._train_layer(self.trainer, self.mlp, self.ds)
    379         return self
    380 

/Users/dikien/anaconda/lib/python2.7/site-packages/scikit_neuralnetwork-0.2-py2.7.egg/sknn/nn.pyc in _train_layer(self, trainer, layer, dataset)
    476 
    477             layer.monitor.report_epoch()
--> 478             layer.monitor()
    479 
    480             if self.verbose:

/Users/dikien/anaconda/lib/python2.7/site-packages/pylearn2/monitor.pyc in __call__(self)
    252             else:
    253                 actual_ne = 0
--> 254                 for X in myiterator:
    255                     # X is a flat (not nested) tuple
    256                     self.run_prereqs(X, d)

/Users/dikien/anaconda/lib/python2.7/site-packages/pylearn2/utils/iteration.pyc in next(self)
    982             rval = self._next(next_index)
    983         else:
--> 984             rval = self._fallback_next(next_index)
    985 
    986         if not self._return_tuple and len(rval) == 1:

/Users/dikien/anaconda/lib/python2.7/site-packages/pylearn2/utils/iteration.pyc in _fallback_next(self, next_index)
   1000         return tuple(
   1001             fn(data[next_index]) if fn else data[next_index]
-> 1002             for data, fn in safe_izip(self._raw_data, self._convert)
   1003         )
   1004 

/Users/dikien/anaconda/lib/python2.7/site-packages/pylearn2/utils/iteration.pyc in <genexpr>((data, fn))
   1000         return tuple(
   1001             fn(data[next_index]) if fn else data[next_index]
-> 1002             for data, fn in safe_izip(self._raw_data, self._convert)
   1003         )
   1004 

/Users/dikien/anaconda/lib/python2.7/site-packages/pylearn2/utils/iteration.pyc in <lambda>(batch, dspace, sp)
    950                 # of the loop.
    951                 fn = (lambda batch, dspace=dspace, sp=sp:
--> 952                       dspace.np_format_as(batch, sp))
    953 
    954             self._convert[i] = fn

/Users/dikien/anaconda/lib/python2.7/site-packages/pylearn2/space/__init__.pyc in np_format_as(self, batch, space)
    484         return self._format_as(is_numeric=True,
    485                                batch=batch,
--> 486                                space=space)
    487 
    488     def _check_sizes(self, space):

/Users/dikien/anaconda/lib/python2.7/site-packages/pylearn2/space/__init__.pyc in _format_as(self, is_numeric, batch, space)
    542 
    543         # checks if self and space have compatible sizes for formatting.
--> 544         self._check_sizes(space)
    545 
    546         return self._format_as_impl(is_numeric, batch, space)

/Users/dikien/anaconda/lib/python2.7/site-packages/pylearn2/space/__init__.pyc in _check_sizes(self, space)
    498                              " can't format a batch into " +
    499                              str(space) + "because its total dimension is " +
--> 500                              str(other_dimension))
    501 
    502     def format_as(self, batch, space):

ValueError: VectorSpace(dim=1, dtype=float64) with total dimension 1 can't format a batch into VectorSpace(dim=10, dtype=float64)because its total dimension is 10