scikit-learn机器学习



In [1]:

    
import sklearn
sklearn.__version__









    Out[1]:





'0.18.2'



In [2]:

    
from sklearn.datasets import load_iris,load_boston,make_classification,make_circles,make_moons



In [5]:

    
# iris 数据集，wiki上有，讲述花的特征种类
data = load_iris()

x = data['data']
y = data['target']
y_label = data['target_names']
x_label = data['feature_names']

print 
print x.shape
print y.shape
print x_label
print y_label









    



(150, 4)
(150,)
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
['setosa' 'versicolor' 'virginica']



In [6]:

    
# Boston 住房数据集 回归问题
data = load_boston()
x = data['data']
y = data['target']
x_labels = data['feature_names']
print x.shape
print y.shape
print x_labels









    



(506, 13)
(506,)
['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']



In [7]:

    
# 制作一些分类数据集,50个样本，5个特征，2个分类
x,y = make_classification(n_samples=50,n_features=5,n_classes=2)
print x.shape
print y.shape

print x[1,:]
print y[1]









    



(50, 5)
(50,)
[-0.25353449  0.30899303  1.17695658 -0.46101866 -0.05937265]
0



In [8]:

    
# 一些非线性数据集
x,y = make_circles()
import numpy as np
import matplotlib.pyplot as plt
plt.close('all')
plt.figure(1)
plt.scatter(x[:,0],x[:,1],c=y)
plt.show()



In [10]:

    
x,y = make_moonsnsoons()
plt.close('all')
plt.scatter(x[:,0],x[:,1],c=y)
plt.show()



In [ ]: