In [1]:
import pandas as pd
from sklearn import datasets, model_selection, svm, metrics
In [2]:
# http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html
# http://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html
iris = datasets.load_iris()
In [3]:
print(type(iris))
print(iris.keys())
In [4]:
iris_data = pd.DataFrame(data=iris.data, columns=iris.feature_names)
print(iris_data.head())
In [5]:
iris_label = pd.Series(data=iris.target)
print(iris_label.head())
In [6]:
print(len(iris_data))
In [7]:
# http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html
data_train, data_test, label_train, label_test = model_selection.train_test_split(iris_data, iris_label)
In [8]:
print(data_train.head())
In [9]:
print(label_train.head())
In [10]:
# default value of test_size = 0.25
print(len(data_train), len(data_test))
In [11]:
clf = svm.SVC()
clf.fit(data_train, label_train)
pre = clf.predict(data_test)
In [12]:
print(type(pre))
print(pre)
In [13]:
# http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html
ac_score = metrics.accuracy_score(label_test, pre)
In [14]:
print(ac_score)
In [15]:
scores = model_selection.cross_val_score(clf, iris_data, iris_label, cv=3)
print(scores)
print(scores.mean())