scikit-learn机器学习


In [1]:
import sklearn
sklearn.__version__


Out[1]:
'0.18.2'

In [2]:
from sklearn.datasets import load_iris,load_boston,make_classification,make_circles,make_moons

In [5]:
# iris 数据集,wiki上有,讲述花的特征种类
data = load_iris()

x = data['data']
y = data['target']
y_label = data['target_names']
x_label = data['feature_names']

print 
print x.shape
print y.shape
print x_label
print y_label


(150, 4)
(150,)
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
['setosa' 'versicolor' 'virginica']

In [6]:
# Boston 住房数据集 回归问题
data = load_boston()
x = data['data']
y = data['target']
x_labels = data['feature_names']
print x.shape
print y.shape
print x_labels


(506, 13)
(506,)
['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']

In [7]:
# 制作一些分类数据集,50个样本,5个特征,2个分类
x,y = make_classification(n_samples=50,n_features=5,n_classes=2)
print x.shape
print y.shape

print x[1,:]
print y[1]


(50, 5)
(50,)
[-0.25353449  0.30899303  1.17695658 -0.46101866 -0.05937265]
0

In [8]:
# 一些非线性数据集
x,y = make_circles()
import numpy as np
import matplotlib.pyplot as plt
plt.close('all')
plt.figure(1)
plt.scatter(x[:,0],x[:,1],c=y)
plt.show()



In [10]:
x,y = make_moonsnsoons()
plt.close('all')
plt.scatter(x[:,0],x[:,1],c=y)
plt.show()



In [ ]: