In [1]:
# -*- coding:utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import warnings
from sklearn import datasets
%matplotlib inline
warnings.filterwarnings(action='ignore') #不输出警告
np.set_printoptions(suppress=True)
In [2]:
data = pd.read_csv('iris.data',header=None)
iris_types = data[4].unique()
for i,type in enumerate(iris_types):
data.set_value(data[4]==type,4,i)
"""
set_value(index, col, value, takeable=False)
index : row label
col : column label
value : scalar value
takeable : interpret the index/col as indexers, default False
returns:
frame : DataFrame
If label pair is contained, will be reference to calling DataFrame, otherwise a new object
"""
Out[2]:
In [3]:
x,y = np.split(data,(4,),axis=1)
x = x.iloc[:,[0,1]] #只取前两列,方便画图
#train_test_split(x, y, train_size=0.8)
In [4]:
lr = Pipeline([
('sc',StandardScaler()),
('clf',LogisticRegression())
])
In [5]:
lr.fit(x, y)
Out[5]:
In [6]:
x.iloc[:,0].min()
Out[6]:
In [7]:
# 画图
N, M = 500, 500 # 横纵各采样多少个值
x1_min, x1_max = x.iloc[:,0].min(), x.iloc[:,0].max() # 第0列的范围
x2_min, x2_max = x.iloc[:,1].min(), x.iloc[:,1].max() # 第1列的范围
t1 = np.linspace(x1_min, x1_max, N)
t2 = np.linspace(x2_min, x2_max, M)
x1, x2 = np.meshgrid(t1, t2) # 生成网格采样点
x_test = np.stack((x1.flat, x2.flat), axis=1) # 测试点
In [8]:
x_test.shape
Out[8]:
In [9]:
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False
cm_light = mpl.colors.ListedColormap(['#77E0A0', '#FF8080', '#A0A0FF'])
cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b'])
y_hat = lr.predict(x_test) # 预测值
y_hat = y_hat.reshape(x1.shape) # 使之与输入的形状相同
plt.figure(facecolor='w')
plt.pcolormesh(x1, x2, y_hat, cmap=cm_light) # 预测值的显示
plt.scatter(x.iloc[:, 0], x.iloc[:, 1], c=y, edgecolors='k', s=50, cmap=cm_dark) # 样本的显示
plt.xlabel(u'花萼长度', fontsize=14)
plt.ylabel(u'花萼宽度', fontsize=14)
plt.xlim(x1_min, x1_max)
plt.ylim(x2_min, x2_max)
plt.title(u'鸢尾花Logistic回归分类效果', fontsize=18)
plt.grid(True)
plt.show()
#ps:softmax 回归
In [ ]: