seaborn中的sns库可以画出很好看的图 试用sns.pairplot()画出基于 “iris”数据集各两对属性的图
In [55]:
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
sns.set(style="ticks", color_codes=True)
iris = sns.load_dataset("iris")
g = sns.pairplot(iris, hue="species")
sns.plt.show()
In [56]:
iris
Out[56]:
In [70]:
"""get the data corrspoding to 'versicolor' and 'setosa' from iris data"""
s = iris['species']
start = s[s == 'virginica'].index[0]
end = s[s == 'virginica'].index[-1]
df1 = iris.loc[start:end, :]
start = s[s == 'setosa'].index[0]
end = s[s == 'setosa'].index[-1]
df2 = iris.loc[start:end, :]
df = pd.concat([df1,df2],ignore_index=True)
In [71]:
#10-fold cross validation
from sklearn.cross_validation import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn import model_selection
from sklearn.metrics import classification_report
#get the test_data and label_data from df
X = df.loc[:,:'petal_width']
y = df.loc[:,'species']
model = LogisticRegression() #choose the logistictRegression
cross_val_score(model,X,y,cv=10)
Out[71]:
In [72]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y,test_size=0.5, random_state=42)
model = LogisticRegression()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
为何正确率是百分之一百
In [ ]: