seaborn中的sns库可以画出很好看的图 试用sns.pairplot()画出基于 “iris”数据集各两对属性的图


In [55]:
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

sns.set(style="ticks", color_codes=True)
iris = sns.load_dataset("iris")
g = sns.pairplot(iris, hue="species")
sns.plt.show()



In [56]:
iris


Out[56]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
5 5.4 3.9 1.7 0.4 setosa
6 4.6 3.4 1.4 0.3 setosa
7 5.0 3.4 1.5 0.2 setosa
8 4.4 2.9 1.4 0.2 setosa
9 4.9 3.1 1.5 0.1 setosa
10 5.4 3.7 1.5 0.2 setosa
11 4.8 3.4 1.6 0.2 setosa
12 4.8 3.0 1.4 0.1 setosa
13 4.3 3.0 1.1 0.1 setosa
14 5.8 4.0 1.2 0.2 setosa
15 5.7 4.4 1.5 0.4 setosa
16 5.4 3.9 1.3 0.4 setosa
17 5.1 3.5 1.4 0.3 setosa
18 5.7 3.8 1.7 0.3 setosa
19 5.1 3.8 1.5 0.3 setosa
20 5.4 3.4 1.7 0.2 setosa
21 5.1 3.7 1.5 0.4 setosa
22 4.6 3.6 1.0 0.2 setosa
23 5.1 3.3 1.7 0.5 setosa
24 4.8 3.4 1.9 0.2 setosa
25 5.0 3.0 1.6 0.2 setosa
26 5.0 3.4 1.6 0.4 setosa
27 5.2 3.5 1.5 0.2 setosa
28 5.2 3.4 1.4 0.2 setosa
29 4.7 3.2 1.6 0.2 setosa
... ... ... ... ... ...
120 6.9 3.2 5.7 2.3 virginica
121 5.6 2.8 4.9 2.0 virginica
122 7.7 2.8 6.7 2.0 virginica
123 6.3 2.7 4.9 1.8 virginica
124 6.7 3.3 5.7 2.1 virginica
125 7.2 3.2 6.0 1.8 virginica
126 6.2 2.8 4.8 1.8 virginica
127 6.1 3.0 4.9 1.8 virginica
128 6.4 2.8 5.6 2.1 virginica
129 7.2 3.0 5.8 1.6 virginica
130 7.4 2.8 6.1 1.9 virginica
131 7.9 3.8 6.4 2.0 virginica
132 6.4 2.8 5.6 2.2 virginica
133 6.3 2.8 5.1 1.5 virginica
134 6.1 2.6 5.6 1.4 virginica
135 7.7 3.0 6.1 2.3 virginica
136 6.3 3.4 5.6 2.4 virginica
137 6.4 3.1 5.5 1.8 virginica
138 6.0 3.0 4.8 1.8 virginica
139 6.9 3.1 5.4 2.1 virginica
140 6.7 3.1 5.6 2.4 virginica
141 6.9 3.1 5.1 2.3 virginica
142 5.8 2.7 5.1 1.9 virginica
143 6.8 3.2 5.9 2.3 virginica
144 6.7 3.3 5.7 2.5 virginica
145 6.7 3.0 5.2 2.3 virginica
146 6.3 2.5 5.0 1.9 virginica
147 6.5 3.0 5.2 2.0 virginica
148 6.2 3.4 5.4 2.3 virginica
149 5.9 3.0 5.1 1.8 virginica

150 rows × 5 columns


In [70]:
"""get the data corrspoding to 'versicolor' and 'setosa' from iris data"""
s = iris['species']
start = s[s == 'virginica'].index[0]
end = s[s == 'virginica'].index[-1]
df1 = iris.loc[start:end, :]

start = s[s == 'setosa'].index[0]
end = s[s == 'setosa'].index[-1]
df2 = iris.loc[start:end, :]

df = pd.concat([df1,df2],ignore_index=True)

In [71]:
#10-fold cross validation
from sklearn.cross_validation import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn import model_selection
from sklearn.metrics import classification_report

#get the test_data and label_data from df
X = df.loc[:,:'petal_width']
y = df.loc[:,'species']

model = LogisticRegression()    #choose the logistictRegression
cross_val_score(model,X,y,cv=10)


Out[71]:
array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [72]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y,test_size=0.5, random_state=42)
model = LogisticRegression()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


             precision    recall  f1-score   support

     setosa       1.00      1.00      1.00        20
  virginica       1.00      1.00      1.00        30

avg / total       1.00      1.00      1.00        50

为何正确率是百分之一百


In [ ]: