Import modules from GAFE


In [1]:
#Implements functional expansions
from functions.FE import FE
#Evaluates accuracy in a dataset for a particular classifier
from fitness import Classifier
#Implements gafe using DEAP toolbox
import ga

Import modules from scikit-learn, numpy and pandas to help us deal with the data


In [2]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd

Load data using pandas. We will use the famous Iris Dataset


In [3]:
iris = pd.read_csv("data/iris.data", sep=",")
#Isolate the attributes columns
irisAtts = iris.drop("class", 1)
#Isolate the class column
target = iris["class"]

Prior to expanding the data, put all values to interval [0,1] for better results


In [4]:
scaledIris = MinMaxScaler().fit_transform(irisAtts)

If, we didnt use GAFE, after testing 49 (7*7) combinations of FE-ES this configuration would be the best for each classifier. Note we are applying the same FE-ES pair for every data column


In [5]:
bestSingleMatch = {'knn': [(1,5) for x in range(4)], 'cart': [(3,2) for x in range(4)], 'svm': [(7,4) for x in range(4)]}

Now lets calculate the accuracy results for original data, single match and GAFE.


In [6]:
functionalExp = FE()

for cl in ['knn', 'cart', 'svm']:
        #Folds are the number of folds used in crossvalidation
        #Jobs are the number of CPUS used in crossvalidation and some classifiers training step.
        #You can also change some classifier parameters, such as k_neigh for neighbors in knn, C in svm and others.
        #If you do not specify, it will use the articles default.        
        model = Classifier(cl, target, folds=10, jobs=6)
        #The class internally normalizes data, so no need to send normalized data when classifying 
        #accuracy without expanding
        print("original accuracy " + cl + " " + str(model.getAccuracy(irisAtts)))
        #Expand the scaled data 
        expandedData = functionalExp.expandMatrix(scaledIris, bestSingleMatch[cl])
        print("single match expansion accuracy " + cl + " " + str(model.getAccuracy(expandedData)))
        #If scaled is False, it will scale data in range [0,1]
        gafe = ga.GAFE(model, scaledIris, target, scaled=True)
        #Specify how many iterations of GAFE you wish with n_iter
        #Note that this is a slow method, so have patience if n_iter is high        
        avg, bestPair = gafe.runGAFE(n_population=21, n_iter=1, verbose=True)
        print("gafe " + cl + " " + str(avg) )


original accuracy knn 0.96
single match expansion accuracy knn 0.966666666667
GAFE iteration = 1
gen	nevals	avg     	min     	max     
0  	21    	0.949524	0.893333	0.966667
1  	16    	0.951111	0.866667	0.973333
2  	18    	0.960317	0.946667	0.973333
3  	18    	0.96127 	0.933333	0.966667
4  	18    	0.958095	0.946667	0.966667
5  	16    	0.96381 	0.946667	0.986667
6  	18    	0.969206	0.953333	0.986667
7  	18    	0.974603	0.946667	0.986667
8  	19    	0.98254 	0.953333	0.986667
9  	16    	0.98381 	0.953333	0.986667
10 	20    	0.984762	0.946667	0.986667
11 	19    	0.984127	0.933333	0.986667
12 	21    	0.985714	0.966667	0.986667
13 	18    	0.986349	0.98    	0.986667
14 	19    	0.981587	0.953333	0.986667
15 	21    	0.985714	0.966667	0.986667
16 	18    	0.986667	0.986667	0.986667
17 	20    	0.985714	0.966667	0.986667
18 	14    	0.986667	0.986667	0.986667
19 	18    	0.97746 	0.92    	0.986667
20 	18    	0.985397	0.96    	0.986667
21 	18    	0.983492	0.966667	0.986667
22 	16    	0.980635	0.94    	0.986667
23 	20    	0.985079	0.96    	0.986667
24 	18    	0.986667	0.986667	0.986667
25 	17    	0.986667	0.986667	0.986667
26 	20    	0.986667	0.986667	0.986667
27 	18    	0.984127	0.933333	0.986667
28 	18    	0.985397	0.96    	0.986667
29 	20    	0.986032	0.973333	0.986667
30 	17    	0.984444	0.953333	0.986667
31 	20    	0.984762	0.96    	0.986667
32 	20    	0.985079	0.96    	0.986667
33 	18    	0.984762	0.96    	0.986667
34 	20    	0.977143	0.9     	0.986667
35 	20    	0.986667	0.986667	0.986667
36 	20    	0.985714	0.966667	0.986667
37 	19    	0.985079	0.953333	0.986667
38 	18    	0.986667	0.986667	0.986667
39 	18    	0.984762	0.96    	0.986667
40 	18    	0.978413	0.92    	0.986667
Accuracy for GAFE iteration is 0.986666666667
gafe knn 0.986666666667
original accuracy cart 0.966666666667
single match expansion accuracy cart 0.973333333333
GAFE iteration = 1
gen	nevals	avg     	min     	max 
0  	21    	0.952063	0.926667	0.98
1  	18    	0.952381	0.913333	0.98
2  	18    	0.96254 	0.933333	0.98
3  	18    	0.968571	0.933333	0.98
4  	18    	0.970794	0.92    	0.986667
5  	21    	0.975873	0.953333	0.986667
6  	18    	0.980635	0.966667	0.986667
7  	20    	0.980635	0.946667	0.986667
8  	18    	0.981587	0.96    	0.986667
9  	18    	0.981905	0.94    	0.986667
10 	18    	0.985079	0.953333	0.986667
11 	16    	0.986667	0.986667	0.986667
12 	20    	0.985714	0.966667	0.986667
13 	18    	0.985714	0.966667	0.986667
14 	18    	0.984444	0.94    	0.986667
15 	20    	0.986667	0.986667	0.986667
16 	20    	0.986667	0.986667	0.986667
17 	19    	0.985714	0.966667	0.986667
18 	18    	0.985079	0.953333	0.986667
19 	16    	0.986667	0.986667	0.986667
20 	18    	0.986349	0.98    	0.986667
21 	16    	0.98381 	0.946667	0.986667
22 	21    	0.986667	0.986667	0.986667
23 	19    	0.984444	0.94    	0.986667
24 	20    	0.986032	0.973333	0.986667
25 	17    	0.983492	0.94    	0.986667
26 	18    	0.985079	0.953333	0.986667
27 	18    	0.982222	0.893333	0.986667
28 	19    	0.984127	0.953333	0.986667
29 	18    	0.985079	0.96    	0.986667
30 	16    	0.986349	0.98    	0.986667
31 	16    	0.986032	0.973333	0.986667
32 	20    	0.984444	0.94    	0.986667
33 	17    	0.983492	0.94    	0.986667
34 	18    	0.984444	0.96    	0.986667
35 	20    	0.984444	0.953333	0.986667
36 	19    	0.985079	0.953333	0.986667
37 	18    	0.986667	0.986667	0.986667
38 	18    	0.984127	0.953333	0.986667
39 	18    	0.979683	0.933333	0.986667
40 	19    	0.979365	0.933333	0.986667
Accuracy for GAFE iteration is 0.986666666667
gafe cart 0.986666666667
original accuracy svm 0.96
single match expansion accuracy svm 0.98
GAFE iteration = 1
gen	nevals	avg    	min     	max 
0  	21    	0.95873	0.926667	0.98
1  	18    	0.964444	0.946667	0.98
2  	19    	0.968889	0.953333	0.98
3  	20    	0.978413	0.966667	0.98
4  	18    	0.979048	0.966667	0.986667
5  	17    	0.979365	0.973333	0.986667
6  	20    	0.97873 	0.953333	0.986667
7  	20    	0.98127 	0.966667	0.986667
8  	18    	0.984762	0.973333	0.993333
9  	16    	0.984444	0.973333	0.993333
10 	19    	0.985714	0.973333	0.993333
11 	18    	0.986984	0.98    	0.993333
12 	20    	0.986349	0.973333	0.993333
13 	17    	0.983175	0.953333	0.993333
14 	17    	0.987937	0.973333	0.993333
15 	20    	0.990476	0.973333	0.993333
16 	17    	0.993016	0.986667	0.993333
17 	20    	0.992698	0.98    	0.993333
18 	20    	0.993333	0.993333	0.993333
19 	17    	0.991746	0.96    	0.993333
20 	16    	0.993333	0.993333	0.993333
21 	16    	0.992698	0.98    	0.993333
22 	19    	0.991746	0.973333	0.993333
23 	21    	0.991429	0.953333	0.993333
24 	18    	0.992063	0.966667	0.993333
25 	20    	0.992698	0.98    	0.993333
26 	16    	0.993333	0.993333	0.993333
27 	21    	0.992063	0.966667	0.993333
28 	16    	0.989841	0.94    	0.993333
29 	20    	0.989524	0.946667	0.993333
30 	20    	0.993333	0.993333	0.993333
31 	18    	0.993016	0.986667	0.993333
32 	21    	0.989524	0.946667	0.993333
33 	19    	0.990476	0.96    	0.993333
34 	18    	0.993333	0.993333	0.993333
35 	20    	0.992698	0.98    	0.993333
36 	18    	0.992698	0.98    	0.993333
37 	16    	0.991111	0.966667	0.993333
38 	19    	0.988571	0.946667	0.993333
39 	19    	0.992698	0.98    	0.993333
40 	18    	0.993333	0.993333	0.993333
Accuracy for GAFE iteration is 0.993333333333
gafe svm 0.993333333333