In [22]:
from sklearn import preprocessing, datasets as d
import numpy as np
import scipy

In [8]:
boston = d.load_boston()
X, y = boston.data, boston.target

In [9]:
X[:, :3].mean(axis=0)


Out[9]:
array([  3.59376071,  11.36363636,  11.13677866])

In [10]:
X[:, :3].std(axis=0)


Out[10]:
array([  8.58828355,  23.29939569,   6.85357058])

In [11]:
X_2 = preprocessing.scale(X[:, :3])

In [12]:
X_2.mean(axis=0)


Out[12]:
array([  6.34099712e-17,  -6.34319123e-16,  -2.68291099e-15])

In [13]:
X_2.std(axis=0)


Out[13]:
array([ 1.,  1.,  1.])

In [14]:
normalized_X = preprocessing.normalize(X[:, :3])

In [16]:
normalized_X.mean(axis=0)


Out[16]:
array([ 0.14564668,  0.25834782,  0.73560531])

In [17]:
normalized_X.std(axis=0)


Out[17]:
array([ 0.22560452,  0.43091533,  0.36652811])

In [19]:
test_x = [[1.0,1.0,0.0], [3.0,3.0,0.0], [1.0,-1.0, 0.0]]
normalized_test = preprocessing.normalize(test_x)

In [20]:
normalized_test


Out[20]:
array([[ 0.70710678,  0.70710678,  0.        ],
       [ 0.70710678,  0.70710678,  0.        ],
       [ 0.70710678, -0.70710678,  0.        ]])

In [21]:
## handling sparse imputations

In [23]:
matrix = scipy.sparse.eye(1000)

In [25]:
preprocessing.scale(matrix, with_mean=False)


Out[25]:
<1000x1000 sparse matrix of type '<type 'numpy.float64'>'
	with 1000 stored elements in Compressed Sparse Row format>

In [ ]: