In [2]:
import numpy as np
import sklearn.preprocessing as pp

In [3]:
a = np.arange(5)
print "a:", a


a: [0 1 2 3 4]

In [4]:
ss = pp.StandardScaler()
print ss.fit(a)
print ss.fit_transform(a)

print ss.transform([[1, 2], [3, 4], [5, 6]])


StandardScaler(copy=True, with_mean=True, with_std=True)
[-1.41421356 -0.70710678  0.          0.70710678  1.41421356]
[[-0.70710678  0.        ]
 [ 0.70710678  1.41421356]
 [ 2.12132034  2.82842712]]
C:\Users\aabao\AppData\Local\Enthought\Canopy\User\lib\site-packages\sklearn\utils\validation.py:332: UserWarning: StandardScaler assumes floating point values as input, got int32
  "got %s" % (estimator, X.dtype))

In [5]:
scaled = ss.transform(a)
print scaled


[-1.41421356 -0.70710678  0.          0.70710678  1.41421356]

In [6]:
np.mean(scaled)


Out[6]:
0.0

In [7]:
np.std(scaled)


Out[7]:
0.99999999999999989

In [20]:
a = np.arange(10).reshape(5,2)
print a
ss = pp.StandardScaler()
#print "test1:", ss.mean_  # mean doesn't exist since hasn't put fit(a)
print "00:", ss
m = pp.StandardScaler().fit(a)
print m
print "test2:", m.mean_
print ss.fit(a) # algorithm how to manipulate the data, ss after fit,train the data (fit=train)  
scaled = ss.transform(a) # fit figure out, transform actually doing it
print scaled


 [[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
00: StandardScaler(copy=True, with_mean=True, with_std=True)
StandardScaler(copy=True, with_mean=True, with_std=True)
test2: [ 4.  5.]
StandardScaler(copy=True, with_mean=True, with_std=True)
[[-1.41421356 -1.41421356]
 [-0.70710678 -0.70710678]
 [ 0.          0.        ]
 [ 0.70710678  0.70710678]
 [ 1.41421356  1.41421356]]

In [46]:
b = np.array([[2, 4], [5, 6], [7, 2], [5, 6], [6, 7]])
print b
ss.transform(b)


[[2 4]
 [5 6]
 [7 2]
 [5 6]
 [6 7]]
Out[46]:
array([[ 0.        ,  1.41421356],
       [ 2.12132034,  2.82842712],
       [ 3.53553391,  0.        ],
       [ 2.12132034,  2.82842712],
       [ 2.82842712,  3.53553391]])

In [61]:
ss = pp.StandardScaler()
print ss.fit([[1, 2], [3, 4], [5, 6]])
print ss.fit_transform([[1, 2], [3, 4], [5, 6]])


StandardScaler(copy=True, with_mean=True, with_std=True)
[[-1.22474487 -1.22474487]
 [ 0.          0.        ]
 [ 1.22474487  1.22474487]]

In [1]:
from sklearn import datasets
iris = datasets.load_iris()
digits = datasets.load_digits()

In [2]:
print digits.data


[[  0.   0.   5. ...,   0.   0.   0.]
 [  0.   0.   0. ...,  10.   0.   0.]
 [  0.   0.   0. ...,  16.   9.   0.]
 ..., 
 [  0.   0.   1. ...,   6.   0.   0.]
 [  0.   0.   2. ...,  12.   0.   0.]
 [  0.   0.  10. ...,  12.   1.   0.]]

In [3]:
digits.target


Out[3]:
array([0, 1, 2, ..., 8, 9, 8])

In [5]:
digits.images[0]


Out[5]:
array([[  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.],
       [  0.,   0.,  13.,  15.,  10.,  15.,   5.,   0.],
       [  0.,   3.,  15.,   2.,   0.,  11.,   8.,   0.],
       [  0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.],
       [  0.,   5.,   8.,   0.,   0.,   9.,   8.,   0.],
       [  0.,   4.,  11.,   0.,   1.,  12.,   7.,   0.],
       [  0.,   2.,  14.,   5.,  10.,  12.,   0.,   0.],
       [  0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.]])

In [6]:
from sklearn import svm
clf = svm.SVC(gamma=0.001, C=100.)

In [7]:
clf.fit(digits.data[:-1], digits.target[:-1])


Out[7]:
SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,
  gamma=0.001, kernel='rbf', max_iter=-1, probability=False,
  random_state=None, shrinking=True, tol=0.001, verbose=False)

In [8]:
clf.predict(digits.data[-1])


Out[8]:
array([8])

In [11]:
clf = svm.SVC()
iris = datasets.load_iris()
X, y = iris.data, iris.target
clf.fit(X, y)


Out[11]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [12]:
import pickle
s = pickle.dumps(clf)
clf2 = pickle.loads(s)
clf2.predict(X[0])


Out[12]:
array([0])

In [13]:
y[0]


Out[13]:
0

In [15]:
a = np.array([[1, 1], [2, 3]])
shape(a)


Out[15]:
(2L, 2L)

In [16]:
a = np.array((1,2,3))
shape(a)


Out[16]:
(3L,)

In [1]:
np.arange('2005-02', '2005-03', dtype='datetime64[D]')


Out[1]:
array(['2005-02-01', '2005-02-02', '2005-02-03', '2005-02-04',
       '2005-02-05', '2005-02-06', '2005-02-07', '2005-02-08',
       '2005-02-09', '2005-02-10', '2005-02-11', '2005-02-12',
       '2005-02-13', '2005-02-14', '2005-02-15', '2005-02-16',
       '2005-02-17', '2005-02-18', '2005-02-19', '2005-02-20',
       '2005-02-21', '2005-02-22', '2005-02-23', '2005-02-24',
       '2005-02-25', '2005-02-26', '2005-02-27', '2005-02-28'], dtype='datetime64[D]')

In [2]:
import math
math.sqrt(16)


Out[2]:
4.0

In [3]:
range(5, 10)


Out[3]:
[5, 6, 7, 8, 9]

In [4]:
list(range(5, 10))


Out[4]:
[5, 6, 7, 8, 9]

In [7]:
range(0, 10)


Out[7]:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [8]:
list(range(0, 10))


Out[8]:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [14]:
myName="David"
myName.center(2)


Out[14]:
'David'

In [ ]: