In [29]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import KFold
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.cross_validation import cross_val_score
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
In [3]:
iris_data = load_iris()
In [20]:
data_input = iris_data.data
data_output = iris_data.target
print(data_output)
In [16]:
kf = KFold(10, n_folds = 5, shuffle=True)
In [17]:
print("Train Set Test Set ")
for train_set,test_set in kf:
print(train_set, test_set)
In [18]:
rf_class = RandomForestClassifier(n_estimators=10)
log_class = LogisticRegression()
svm_class = svm.SVC()
In [23]:
print("Random Forests: ")
print(cross_val_score(rf_class, data_input, data_output, scoring='accuracy', cv = 10))
accuracy = cross_val_score(rf_class, data_input, data_output, scoring='accuracy', cv = 10).mean() * 100
print("Accuracy of Random Forests is: " , accuracy)
print("\n\nSVM:")
print(cross_val_score(svm_class, data_input, data_output, scoring='accuracy', cv = 10))
accuracy = cross_val_score(svm_class, data_input, data_output, scoring='accuracy', cv = 10).mean() * 100
print("Accuracy of SVM is: " , accuracy)
print("\n\nLog:")
print(cross_val_score(log_class, data_input, data_output, scoring='accuracy', cv = 10))
accuracy = cross_val_score(log_class, data_input, data_output, scoring='accuracy', cv = 10).mean() * 100
print("Accuracy of SVM is: " , accuracy)
In [30]:
X_train, X_test, y_train, y_test = train_test_split(data_input, data_output, test_size=0.33)
In [31]:
rf_class.fit(X_train, y_train)
rf_class.score(X_test, y_test)
Out[31]:
In [34]:
log_class.fit(X_train, y_train)
log_class.score(X_test, y_test)
Out[34]:
In [33]:
svm_class.fit(X_train, y_train)
svm_class.score(X_test, y_test)
Out[33]:
In [37]:
from sklearn import cross_validation
kf2 = KFold(len(iris_data.data), n_folds=10, shuffle=True, random_state=10)
cv = cross_validation.ShuffleSplit(len(iris_data.data), n_iter=10, test_size=0.3, random_state=0)
# Shows internal shuffling not that good, for this example
print("With kf2 ----")
print("Random Forest: {}".format(cross_val_score(rf_class, data_input, data_output, cv=kf2, scoring='accuracy').mean()))
print("SVM: ", cross_val_score(svm_class, data_input, data_output, cv=kf2, scoring='accuracy').mean())
print("Logistic: ", cross_val_score(log_class, data_input, data_output, cv=kf2, scoring='accuracy').mean())
print("\n With cv")
print("Random Forest: {}".format(cross_val_score(rf_class, data_input,data_output, cv=cv, scoring='accuracy').mean()))
print("SVM: ", cross_val_score(svm_class, data_input, data_output, cv=cv, scoring='accuracy').mean())
print("Logistic: ", cross_val_score(log_class, data_input, data_output, cv=cv, scoring='accuracy').mean())
In [ ]: