notebook.community

Edit and run



In [1]:

    
# -*- coding:utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import warnings
from sklearn import datasets
%matplotlib inline
warnings.filterwarnings(action='ignore') #不输出警告
np.set_printoptions(suppress=True)



In [2]:

    
data = pd.read_csv('iris.data',header=None)
iris_types = data[4].unique()
for i,type in enumerate(iris_types):
    data.set_value(data[4]==type,4,i)  
"""
set_value(index, col, value, takeable=False)
index : row label
col : column label
value : scalar value
takeable : interpret the index/col as indexers, default False
returns:
frame : DataFrame
If label pair is contained, will be reference to calling DataFrame, otherwise a new object
"""









    Out[2]:





'\nset_value(index, col, value, takeable=False)\nindex : row label\ncol : column label\nvalue : scalar value\ntakeable : interpret the index/col as indexers, default False\nreturns:\nframe : DataFrame\nIf label pair is contained, will be reference to calling DataFrame, otherwise a new object\n'



In [3]:

    
x,y = np.split(data,(4,),axis=1)
x = x.iloc[:,[0,1]]  #只取前两列，方便画图
#train_test_split(x, y, train_size=0.8)



In [4]:

    
lr = Pipeline([
        ('sc',StandardScaler()),
        ('clf',LogisticRegression())
    ])



In [5]:

    
lr.fit(x, y)









    Out[5]:





Pipeline(memory=None,
     steps=[('sc', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))])



In [6]:

    
x.iloc[:,0].min()









    Out[6]:





4.2999999999999998



In [7]:

    
# 画图
N, M = 500, 500     # 横纵各采样多少个值
x1_min, x1_max = x.iloc[:,0].min(), x.iloc[:,0].max()   # 第0列的范围
x2_min, x2_max = x.iloc[:,1].min(), x.iloc[:,1].max()   # 第1列的范围
t1 = np.linspace(x1_min, x1_max, N)
t2 = np.linspace(x2_min, x2_max, M)
x1, x2 = np.meshgrid(t1, t2)                    # 生成网格采样点
x_test = np.stack((x1.flat, x2.flat), axis=1)   # 测试点



In [8]:

    
x_test.shape









    Out[8]:





(250000, 2)



In [9]:

    
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False
cm_light = mpl.colors.ListedColormap(['#77E0A0', '#FF8080', '#A0A0FF'])
cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b'])
y_hat = lr.predict(x_test)                  # 预测值
y_hat = y_hat.reshape(x1.shape)                 # 使之与输入的形状相同
plt.figure(facecolor='w')
plt.pcolormesh(x1, x2, y_hat, cmap=cm_light)     # 预测值的显示
plt.scatter(x.iloc[:, 0], x.iloc[:, 1], c=y, edgecolors='k', s=50, cmap=cm_dark)    # 样本的显示
plt.xlabel(u'花萼长度', fontsize=14)
plt.ylabel(u'花萼宽度', fontsize=14)
plt.xlim(x1_min, x1_max)
plt.ylim(x2_min, x2_max)
plt.title(u'鸢尾花Logistic回归分类效果', fontsize=18)
plt.grid(True)
plt.show()
#ps:softmax 回归



In [ ]: