In [1]:
# 分类
import numpy as np
import pandas as pd
from sklearn import datasets
iris = datasets.load_iris()
iris.data
iris.target
np.unique(iris.target)
Out[1]:
In [2]:
digits = datasets.load_digits()
np.unique(digits.target)
Out[2]:
In [3]:
from sklearn import svm
clf = svm.SVC(gamma=0.001, C=100.)
clf.fit(digits.data[:-1], digits.target[:-1])
Out[3]:
In [11]:
clf.predict(digits.data[-1].reshape(1, -1))
Out[11]:
In [12]:
# 线性回归
from sklearn import linear_model
clf = linear_model.LinearRegression()
clf.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
clf.coef_
Out[12]:
In [13]:
# knn 分类
from sklearn import neighbors
knn = neighbors.KNeighborsClassifier()
knn.fit(iris.data, iris.target)
knn.predict([[0.1, 0.2, 0.3, 0.4]])
Out[13]:
In [16]:
# K-means 聚类
from sklearn import cluster, datasets
iris = datasets.load_iris()
k_means = cluster.KMeans(n_clusters = 3)
k_means.fit(iris.data)
Out[16]:
In [17]:
# 对比聚类结果
print(k_means.labels_[::10])
print(iris.target[::10])
In [18]:
# 主成分分析
from sklearn import decomposition
pca = decomposition.PCA(n_components=2)
pca.fit(iris.data)
Out[18]:
In [20]:
import pylab as pl
X = pca.transform(iris.data)
pl.scatter(X[:, 0], X[:, 1], c=iris.target)
pl.show()