In [29]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import KFold
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.cross_validation import cross_val_score
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split

In [3]:
iris_data = load_iris()

In [20]:
data_input = iris_data.data
data_output = iris_data.target
print(data_output)


[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]

In [16]:
kf = KFold(10, n_folds = 5, shuffle=True)

In [17]:
print("Train Set          Test Set        ")
for train_set,test_set in kf:
    print(train_set, test_set)


Train Set          Test Set        
[0 1 2 3 4 5 7 8] [6 9]
[0 2 4 5 6 7 8 9] [1 3]
[0 1 3 4 5 6 8 9] [2 7]
[1 2 3 4 5 6 7 9] [0 8]
[0 1 2 3 6 7 8 9] [4 5]

In [18]:
rf_class = RandomForestClassifier(n_estimators=10)
log_class = LogisticRegression()
svm_class = svm.SVC()

In [23]:
print("Random Forests: ")
print(cross_val_score(rf_class, data_input, data_output, scoring='accuracy', cv = 10))
accuracy = cross_val_score(rf_class, data_input, data_output, scoring='accuracy', cv = 10).mean() * 100
print("Accuracy of Random Forests is: " , accuracy)

print("\n\nSVM:")
print(cross_val_score(svm_class, data_input, data_output, scoring='accuracy', cv = 10))
accuracy = cross_val_score(svm_class, data_input, data_output, scoring='accuracy', cv = 10).mean() * 100
print("Accuracy of SVM is: " , accuracy)

print("\n\nLog:")
print(cross_val_score(log_class, data_input, data_output, scoring='accuracy', cv = 10))
accuracy = cross_val_score(log_class, data_input, data_output, scoring='accuracy', cv = 10).mean() * 100
print("Accuracy of SVM is: " , accuracy)


Random Forests: 
[ 1.          0.93333333  1.          0.93333333  0.93333333  0.93333333
  0.86666667  1.          1.          1.        ]
Accuracy of Random Forests is:  95.3333333333


SVM:
[ 1.          0.93333333  1.          1.          1.          0.93333333
  0.93333333  1.          1.          1.        ]
Accuracy of SVM is:  98.0


Log:
[ 1.          1.          1.          0.93333333  0.93333333  0.93333333
  0.8         0.93333333  1.          1.        ]
Accuracy of SVM is:  95.3333333333

In [30]:
X_train, X_test, y_train, y_test = train_test_split(data_input, data_output, test_size=0.33)

In [31]:
rf_class.fit(X_train, y_train)
rf_class.score(X_test, y_test)


Out[31]:
0.93999999999999995

In [34]:
log_class.fit(X_train, y_train)
log_class.score(X_test, y_test)


Out[34]:
0.93999999999999995

In [33]:
svm_class.fit(X_train, y_train)
svm_class.score(X_test, y_test)


Out[33]:
0.97999999999999998

In [37]:
from sklearn import cross_validation
kf2 = KFold(len(iris_data.data), n_folds=10, shuffle=True, random_state=10)
cv = cross_validation.ShuffleSplit(len(iris_data.data), n_iter=10,  test_size=0.3, random_state=0)

# Shows internal shuffling not that good, for this example
print("With kf2 ----")
print("Random Forest: {}".format(cross_val_score(rf_class, data_input, data_output, cv=kf2, scoring='accuracy').mean()))
print("SVM: ", cross_val_score(svm_class, data_input, data_output, cv=kf2, scoring='accuracy').mean())
print("Logistic: ", cross_val_score(log_class, data_input, data_output, cv=kf2, scoring='accuracy').mean())
print("\n With cv")
print("Random Forest: {}".format(cross_val_score(rf_class, data_input,data_output, cv=cv, scoring='accuracy').mean()))
print("SVM: ", cross_val_score(svm_class, data_input, data_output, cv=cv, scoring='accuracy').mean())
print("Logistic: ", cross_val_score(log_class, data_input, data_output, cv=cv, scoring='accuracy').mean())


With kf2 ----
Random Forest: 0.9533333333333334
SVM:  0.98
Logistic:  0.946666666667

 With cv
Random Forest: 0.9577777777777777
SVM:  0.975555555556
Logistic:  0.951111111111

In [ ]: