In [1]:
#from IPython.diasplay import Image
#Image(filename="Image/iris_petal_sepal.jpg", width=400, height=432)
你还可以通过python的csv模块,或者NumPy的loadtxt函数,或者Pandas的read_csv()函数读取从UCI Iris dataset下载的csv文件。
In [2]:
from sklearn.datasets import load_iris
iris = load_iris()
type(iris)
Out[2]:
In [3]:
print iris.feature_names
print iris.target_names
在scikit-learn中对数据有如下要求:
In [4]:
print type(iris.data)
print type(iris.target)
In [5]:
print iris.data.shape
print iris.target.shape
In [6]:
# store features matrix in "X"
X = iris.data
# store response vector in "y"
y = iris.target
In [7]:
from IPython.display import HTML
HTML('<iframe src=http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data width=300 height=200></iframe>')
Out[7]:
In [8]:
%matplotlib inline
import matplotlib.pyplot as plt
X_sepal = X[:, :2]
plt.scatter(X_sepal[:, 0], X_sepal[:, 1], c=y, cmap=plt.cm.gnuplot)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
Out[8]:
In [9]:
X_petal = X[:, 2:4]
plt.scatter(X_petal[:, 0], X_petal[:, 1], c=y, cmap=plt.cm.gnuplot)
plt.xlabel('Petal length')
plt.ylabel('Petal width')
Out[9]:
KNN分类的基本步骤:
下面给出KNN的演示图例,
分别是训练数据、K=1时的KNN分类图、K=5时的KNN分类图
In [10]:
from sklearn.neighbors import KNeighborsClassifier
In [11]:
# looking for the one nearest neighbor
knn = KNeighborsClassifier(n_neighbors=1)
In [12]:
print knn
In [13]:
knn.fit(X, y)
Out[13]:
In [14]:
knn.predict([3, 5, 4, 2])
Out[14]:
In [15]:
X_new = [[3, 5, 4, 2], [5, 4, 3, 2]]
knn.predict(X_new)
Out[15]:
In [16]:
knn5 = KNeighborsClassifier(n_neighbors=5)
knn5.fit(X, y)
knn5.predict(X_new)
Out[16]:
In [17]:
# import the class
from sklearn.linear_model import LogisticRegression
# instantiate the model (using the default parameters)
logreg = LogisticRegression()
# fit the model with data
logreg.fit(X, y)
# predict the response for new observations
logreg.predict(X_new)
Out[17]: