In [1]:
# 分类

import numpy as np
import pandas as pd
from sklearn import datasets

iris = datasets.load_iris()
iris.data
iris.target

np.unique(iris.target)


Out[1]:
array([0, 1, 2])

In [2]:
digits = datasets.load_digits()
np.unique(digits.target)


Out[2]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [3]:
from sklearn import svm

clf = svm.SVC(gamma=0.001, C=100.)
clf.fit(digits.data[:-1], digits.target[:-1])


Out[3]:
SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [11]:
clf.predict(digits.data[-1].reshape(1, -1))


Out[11]:
array([8])

In [12]:
# 线性回归

from sklearn import linear_model

clf = linear_model.LinearRegression()
clf.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
clf.coef_


Out[12]:
array([ 0.5,  0.5])

In [13]:
# knn 分类

from sklearn import neighbors
knn = neighbors.KNeighborsClassifier()
knn.fit(iris.data, iris.target) 
knn.predict([[0.1, 0.2, 0.3, 0.4]])


Out[13]:
array([0])

In [16]:
# K-means 聚类

from sklearn import cluster, datasets
iris = datasets.load_iris()
k_means = cluster.KMeans(n_clusters = 3)
k_means.fit(iris.data)


Out[16]:
KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=3, n_init=10,
    n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001,
    verbose=0)

In [17]:
# 对比聚类结果

print(k_means.labels_[::10])
print(iris.target[::10])


[0 0 0 0 0 1 1 1 1 1 2 2 2 2 2]
[0 0 0 0 0 1 1 1 1 1 2 2 2 2 2]

In [18]:
# 主成分分析

from sklearn import decomposition
pca = decomposition.PCA(n_components=2)
pca.fit(iris.data)


Out[18]:
PCA(copy=True, n_components=2, whiten=False)

In [20]:
import pylab as pl
X = pca.transform(iris.data)
pl.scatter(X[:, 0], X[:, 1], c=iris.target) 
pl.show()