notebook.community

Edit and run



In [1]:

    
from sklearn import datasets
from polyssifier import poly
import numpy as np
import pandas as pd
from sklearn.datasets import make_moons, make_classification
import matplotlib.pyplot as plt 
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")



In [3]:

    
data, label = make_moons(n_samples=500, noise=0.4)
print(data.shape)
print(label.shape)

plt.plot(data[label==0,0], data[label==0,1],'.', alpha=.5);
plt.plot(data[label==1,0], data[label==1,1],'r.', alpha=.5);
report = poly(data,label, n_folds=3, verbose=True, save=False, scale=True,
              feature_selection=False, scoring='auc', concurrency=1)
report.plot_scores()









    



(500, 2)
(500,)
                                 mean       std       min       max
Multilayer Perceptron train  0.945326  0.003201  0.941682  0.947685
                      test   0.927588  0.008685  0.920743  0.937358
Nearest Neighbors     train  0.941582  0.005668  0.936547  0.947721
                      test   0.919061  0.011822  0.911453  0.932681
SVM                   train  0.917366  0.011477  0.910655  0.930618
                      test   0.903057  0.002547  0.900131  0.904776
Linear SVM            train  0.902608  0.003627  0.899100  0.906343
                      test   0.903144  0.006267  0.897227  0.909711
Decision Tree         train  1.000000  0.000000  1.000000  1.000000
                      test   0.777897  0.032787  0.740964  0.803571
Random Forest         train  0.999731  0.000234  0.999570  1.000000
                      test   0.886372  0.012659  0.872478  0.897251
Logistic Regression   train  0.902572  0.003588  0.899064  0.906235
                      test   0.902028  0.007877  0.893744  0.909421
Naive Bayes           train  0.902736  0.003232  0.900355  0.906415
                      test   0.901061  0.005643  0.894760  0.905647
Voting                train  0.861987  0.021716  0.844311  0.886228
                      test   0.821954  0.013370  0.807229  0.833333



In [4]:

    
report.scores









    Out[4]:






  
    
      
      Multilayer Perceptron
      Nearest Neighbors
      SVM
      Linear SVM
      Decision Tree
      Random Forest
      Logistic Regression
      Naive Bayes
      Voting
    
    
      
      train
      test
      train
      test
      train
      test
      train
      test
      train
      test
      train
      test
      train
      test
      train
      test
      train
      test
    
  
  
    
      0
      0.941682
      0.937358
      0.936547
      0.932681
      0.910655
      0.904266
      0.902381
      0.902494
      1
      0.803571
      1
      0.897251
      0.902417
      0.90292
      0.901437
      0.902778
      0.855422
      0.833333
    
    
      1
      0.947685
      0.920743
      0.940478
      0.91305
      0.930618
      0.900131
      0.8991
      0.909711
      1
      0.789157
      0.999624
      0.889389
      0.899064
      0.909421
      0.900355
      0.905647
      0.886228
      0.825301
    
    
      2
      0.94661
      0.924663
      0.947721
      0.911453
      0.910825
      0.904776
      0.906343
      0.897227
      1
      0.740964
      0.99957
      0.872478
      0.906235
      0.893744
      0.906415
      0.89476
      0.844311
      0.807229



In [8]:

    
plt.figure(figsize=(20, 10))
for n, (key, pred) in enumerate(report.predictions.items()):
    plt.subplot(2,5,n+1)
    missclass = np.where(pred==0)[0]
    corrclass = np.where(pred==1)[0]
    plt.plot(data[corrclass,0], data[corrclass,1],'r.', alpha=0.8);
    plt.plot(data[missclass,0], data[missclass,1],'b.', alpha=0.8);
    plt.legend(['class 1', 'class2'])
    plt.title('{0} ({1:.2f}%)'.format(key, 100*np.mean(report.scores[key]['test']) ))



In [22]:

    
data, label = make_classification(n_samples=500, n_features=20,
                                  n_informative=5, n_redundant=2,
                                  n_repeated=0, n_classes=2,
                                  n_clusters_per_class=2, weights=None,
                                  flip_y=0.01, class_sep=1.0,
                                  hypercube=True, shift=0.0,
                                  scale=1.0, shuffle=True,
                                  random_state=None)
report = poly(data, label, n_folds=3, verbose=True,
              feature_selection=False, save=False, concurrency=1)
report.plot_scores()









    



                                 mean       std       min       max
Multilayer Perceptron train  1.000000  0.000000  1.000000  1.000000
                      test   0.948804  0.013261  0.933735  0.958692
Nearest Neighbors     train  0.954424  0.008247  0.945191  0.961060
                      test   0.921777  0.028079  0.889917  0.942915
SVM                   train  0.997144  0.000153  0.996988  0.997295
                      test   0.942016  0.013340  0.928571  0.955250
Linear SVM            train  0.948698  0.008763  0.939651  0.957146
                      test   0.932068  0.013884  0.919679  0.947074
Decision Tree         train  1.000000  0.000000  1.000000  1.000000
                      test   0.792001  0.014543  0.778256  0.807229
Random Forest         train  0.999892  0.000172  0.999693  1.000000
                      test   0.948124  0.013647  0.932444  0.957323
Logistic Regression   train  0.950606  0.010007  0.940300  0.960284
                      test   0.930293  0.015136  0.916523  0.946500
Naive Bayes           train  0.967118  0.005045  0.962063  0.972152
                      test   0.947105  0.011460  0.937177  0.959646
Voting                train  0.955847  0.006256  0.948831  0.960843
                      test   0.883773  0.007123  0.879518  0.891997



In [23]:

    
report.plot_features()









    



2017-05-05 09:55:01,655 - Report - INFO - Plotting Linear SVM coefs to temp_Linear SVM_feature_ranking.png
2017-05-05 09:55:02,022 - Report - INFO - Plotting Decision Tree coefs to temp_Decision Tree_feature_ranking.png
2017-05-05 09:55:02,373 - Report - INFO - Plotting Random Forest coefs to temp_Random Forest_feature_ranking.png
2017-05-05 09:55:02,718 - Report - INFO - Plotting Logistic Regression coefs to temp_Logistic Regression_feature_ranking.png



In [21]:

    
fs['Linear SVM']









    Out[21]:





array([[ 0.99237181,  0.86387006, -0.09422622,  0.36759695],
       [ 0.56995341,  0.35415505, -0.2165773 ,  0.04691248],
       [ 0.35034115,  0.20246074, -0.22229275, -0.11547062]])



In [11]:

    
for key, val in report.confusions.items():
    print(key)
    print(val)









    



Multilayer Perceptron
[[ 219.   29.]
 [  31.  221.]]
Nearest Neighbors
[[ 217.   31.]
 [  38.  214.]]
SVM
[[ 221.   27.]
 [  38.  214.]]
Linear SVM
[[ 197.   51.]
 [  42.  210.]]
Decision Tree
[[ 185.   63.]
 [  56.  196.]]
Random Forest
[[ 208.   40.]
 [  40.  212.]]
Logistic Regression
[[ 197.   51.]
 [  45.  207.]]
Naive Bayes
[[ 197.   51.]
 [  51.  201.]]
Voting
[[ 224.   24.]
 [  45.  207.]]



In [13]:

    
report.test_proba.head()









    Out[13]:






  
    
      
      Multilayer Perceptron
      Nearest Neighbors
      SVM
      Linear SVM
      Decision Tree
      Random Forest
      Logistic Regression
      Naive Bayes
      Voting
    
  
  
    
      0
      0.000005
      0.6
      0.371396
      0.289353
      1.0
      0.4
      0.306328
      0.481176
      1.0
    
    
      1
      0.999995
      0.4
      0.628604
      0.266117
      0.0
      0.6
      0.693672
      0.518824
      1.0
    
    
      2
      0.000046
      0.2
      0.081778
      -2.392563
      0.0
      0.0
      0.323797
      0.293630
      0.0
    
    
      3
      0.999954
      0.8
      0.918222
      -0.211439
      1.0
      1.0
      0.676203
      0.706370
      0.0
    
    
      4
      1.000000
      1.0
      0.760359
      0.694488
      0.0
      0.6
      0.998923
      0.999957
      1.0



In [ ]:

	Multilayer Perceptron		Nearest Neighbors		SVM		Linear SVM		Decision Tree		Random Forest		Logistic Regression		Naive Bayes		Voting
	train	test	train	test	train	test	train	test	train	test	train	test	train	test	train	test	train	test
0	0.941682	0.937358	0.936547	0.932681	0.910655	0.904266	0.902381	0.902494	1	0.803571	1	0.897251	0.902417	0.90292	0.901437	0.902778	0.855422	0.833333
1	0.947685	0.920743	0.940478	0.91305	0.930618	0.900131	0.8991	0.909711	1	0.789157	0.999624	0.889389	0.899064	0.909421	0.900355	0.905647	0.886228	0.825301
2	0.94661	0.924663	0.947721	0.911453	0.910825	0.904776	0.906343	0.897227	1	0.740964	0.99957	0.872478	0.906235	0.893744	0.906415	0.89476	0.844311	0.807229

	Multilayer Perceptron	Nearest Neighbors	SVM	Linear SVM	Decision Tree	Random Forest	Logistic Regression	Naive Bayes	Voting
0	0.000005	0.6	0.371396	0.289353	1.0	0.4	0.306328	0.481176	1.0
1	0.999995	0.4	0.628604	0.266117	0.0	0.6	0.693672	0.518824	1.0
2	0.000046	0.2	0.081778	-2.392563	0.0	0.0	0.323797	0.293630	0.0
3	0.999954	0.8	0.918222	-0.211439	1.0	1.0	0.676203	0.706370	0.0
4	1.000000	1.0	0.760359	0.694488	0.0	0.6	0.998923	0.999957	1.0