notebook.community

Edit and run



In [1]:

    
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import composition as comp

%matplotlib inline









    



/home/jbourbeau/.local/lib/python2.7/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)



In [2]:

    
feature_list, feature_labels = comp.get_training_features()



In [3]:

    
pipeline_str = 'xgboost'
scoring = 'accuracy'
cv = 10



In [4]:

    
sfs = pd.read_csv('SFS-results/{}_forward_nofloat_{}_cv{}.csv'.format(pipeline_str, scoring, cv))
sffs = pd.read_csv('SFS-results/{}_forward_floating_{}_cv{}.csv'.format(pipeline_str, scoring, cv))
sbs = pd.read_csv('SFS-results/{}_backward_nofloat_{}_cv{}.csv'.format(pipeline_str, scoring, cv))
sfbs = pd.read_csv('SFS-results/{}_backward_floating_{}_cv{}.csv'.format(pipeline_str, scoring, cv))



In [5]:

    
sfs









    Out[5]:






  
    
      
      Unnamed: 0
      avg_score
      ci_bound
      cv_scores
      feature_idx
      std_dev
      std_err
    
  
  
    
      0
      1
      0.615184
      0.002381
      [ 0.61324942  0.62146406  0.61371315  0.609738...
      (12,)
      0.003206
      0.001069
    
    
      1
      2
      0.770208
      0.002141
      [ 0.76442531  0.76787016  0.77250745  0.773964...
      (9, 12)
      0.002882
      0.000961
    
    
      2
      3
      0.777038
      0.002104
      [ 0.77310368  0.7736999   0.77793972  0.782378...
      (9, 12, 5)
      0.002833
      0.000944
    
    
      3
      4
      0.780642
      0.002175
      [ 0.77575356  0.77913216  0.78032461  0.783040...
      (9, 12, 5, 4)
      0.002928
      0.000976
    
    
      4
      5
      0.782477
      0.002080
      [ 0.77827095  0.7783372   0.78403445  0.785226...
      (9, 2, 12, 5, 4)
      0.002800
      0.000933
    
    
      5
      6
      0.783491
      0.002046
      [ 0.77873468  0.77972839  0.78363697  0.786684...
      (1, 2, 4, 5, 9, 12)
      0.002754
      0.000918
    
    
      6
      7
      0.784206
      0.002183
      [ 0.77899967  0.77999338  0.78482941  0.787810...
      (1, 2, 4, 5, 9, 12, 14)
      0.002940
      0.000980
    
    
      7
      8
      0.784511
      0.001996
      [ 0.77966214  0.78078834  0.78376946  0.787810...
      (1, 2, 4, 5, 9, 11, 12, 14)
      0.002687
      0.000896
    
    
      8
      9
      0.784564
      0.002013
      [ 0.77966214  0.78025836  0.78575687  0.788075...
      (0, 1, 2, 4, 5, 9, 11, 12, 14)
      0.002710
      0.000903
    
    
      9
      10
      0.784564
      0.002028
      [ 0.77959589  0.78025836  0.78575687  0.788141...
      (0, 1, 2, 4, 5, 6, 9, 11, 12, 14)
      0.002731
      0.000910
    
    
      10
      11
      0.784564
      0.002028
      [ 0.77959589  0.78025836  0.78575687  0.788141...
      (0, 1, 2, 4, 5, 6, 9, 11, 12, 13, 14)
      0.002731
      0.000910
    
    
      11
      12
      0.784564
      0.002028
      [ 0.77959589  0.78025836  0.78575687  0.788141...
      (0, 1, 2, 4, 5, 6, 9, 11, 12, 13, 14, 15)
      0.002731
      0.000910
    
    
      12
      13
      0.784233
      0.002003
      [ 0.7793309   0.78045711  0.78476317  0.788141...
      (0, 1, 2, 4, 5, 6, 7, 9, 11, 12, 13, 14, 15)
      0.002696
      0.000899
    
    
      13
      14
      0.784716
      0.002262
      [ 0.78019212  0.78138456  0.7852269   0.788605...
      (0, 1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15)
      0.003046
      0.001015
    
    
      14
      15
      0.784756
      0.002156
      [ 0.77986088  0.78191454  0.78535939  0.789069...
      (0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
      0.002902
      0.000967
    
    
      15
      16
      0.784756
      0.002156
      [ 0.77986088  0.78191454  0.78535939  0.789069...
      (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...
      0.002902
      0.000967



In [6]:

    
selector_list = [sfs, sffs, sbs, sfbs]
selector_label = ['SFS', 'SFFS', 'SBS', 'SFBS']
fig, ax = plt.subplots()
for idx, selector in enumerate(selector_list):
    scores = [row['avg_score'] for index, row in selector.iterrows()]
    errs = [row['std_dev'] for index, row in selector.iterrows()]
    ax.errorbar(range(1, len(selector)+1), scores, yerr=errs,
                marker='.', linestyle='-', lw=1, alpha=0.75,
                label=selector_label[idx])
plt.xlabel('Feature subset size')
plt.ylabel('Accurary')
plt.title('XGBoost')
plt.grid()
plt.legend(title='Selection algorithm', loc='lower right')
plt.show()









    



/home/jbourbeau/.local/lib/python2.7/site-packages/matplotlib/figure.py:1742: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "



In [15]:

    
k_features = 8
for idx, selector in enumerate(selector_list):
    indices_str = selector['feature_idx'][k_features-1]
    indices_str = indices_str.replace('(','')
    indices_str = indices_str.replace(')','')
    indices_str = indices_str.replace(',','')
    indices = [int(i) for i in indices_str.split()]
    print(selector_label[idx]+': (k = {})'.format(k_features))
    indices = [int(i) for i in indices_str.split()]
    print(np.sort(feature_labels[list(indices)]))









    



SFS: (k = 8)
['$\\cos(\\theta_{\\mathrm{Lap}})$' '$\\log_{10}$(NChannels (top 50\\%))'
 '$\\log_{10}$(NHits (top 50\\%))' '$\\log_{10}(S_{\\mathrm{500}})$'
 '$\\log_{10}(S_{\\mathrm{50}})$' '$r\\log_{10}(l)$'
 'Num HE stochastics (standard)' 'dE/dX (standard)']
SFFS: (k = 8)
['$\\cos(\\theta_{\\mathrm{Lap}})$' '$\\log_{10}$(NChannels (top 50\\%))'
 '$\\log_{10}$(NHits (top 50\\%))' '$\\log_{10}(S_{\\mathrm{500}})$'
 '$\\log_{10}(S_{\\mathrm{50}})$' '$r\\log_{10}(l)$'
 'Num HE stochastics (standard)' 'dE/dX (standard)']
SBS: (k = 8)
['$\\cos(\\theta_{\\mathrm{Lap}})$' '$\\log_{10}$(NChannels (top 50\\%))'
 '$\\log_{10}(E_{\\mathrm{Lap}}/\\mathrm{GeV})$'
 '$\\log_{10}(S_{\\mathrm{50}})$' '$r\\log_{10}(l)$' 'NChannels/NHits'
 'Num HE stochastics (standard)' 'dE/dX (standard)']
SFBS: (k = 8)
['$\\cos(\\theta_{\\mathrm{Lap}})$' '$\\log_{10}$(NChannels (top 50\\%))'
 '$\\log_{10}(E_{\\mathrm{Lap}}/\\mathrm{GeV})$'
 '$\\log_{10}(S_{\\mathrm{50}})$' '$r\\log_{10}(l)$' 'NChannels/NHits'
 'Num HE stochastics (standard)' 'dE/dX (standard)']



In [ ]:



In [ ]:

	Unnamed: 0	avg_score	ci_bound	cv_scores	feature_idx	std_dev	std_err
0	1	0.615184	0.002381	[ 0.61324942 0.62146406 0.61371315 0.609738...	(12,)	0.003206	0.001069
1	2	0.770208	0.002141	[ 0.76442531 0.76787016 0.77250745 0.773964...	(9, 12)	0.002882	0.000961
2	3	0.777038	0.002104	[ 0.77310368 0.7736999 0.77793972 0.782378...	(9, 12, 5)	0.002833	0.000944
3	4	0.780642	0.002175	[ 0.77575356 0.77913216 0.78032461 0.783040...	(9, 12, 5, 4)	0.002928	0.000976
4	5	0.782477	0.002080	[ 0.77827095 0.7783372 0.78403445 0.785226...	(9, 2, 12, 5, 4)	0.002800	0.000933
5	6	0.783491	0.002046	[ 0.77873468 0.77972839 0.78363697 0.786684...	(1, 2, 4, 5, 9, 12)	0.002754	0.000918
6	7	0.784206	0.002183	[ 0.77899967 0.77999338 0.78482941 0.787810...	(1, 2, 4, 5, 9, 12, 14)	0.002940	0.000980
7	8	0.784511	0.001996	[ 0.77966214 0.78078834 0.78376946 0.787810...	(1, 2, 4, 5, 9, 11, 12, 14)	0.002687	0.000896
8	9	0.784564	0.002013	[ 0.77966214 0.78025836 0.78575687 0.788075...	(0, 1, 2, 4, 5, 9, 11, 12, 14)	0.002710	0.000903
9	10	0.784564	0.002028	[ 0.77959589 0.78025836 0.78575687 0.788141...	(0, 1, 2, 4, 5, 6, 9, 11, 12, 14)	0.002731	0.000910
10	11	0.784564	0.002028	[ 0.77959589 0.78025836 0.78575687 0.788141...	(0, 1, 2, 4, 5, 6, 9, 11, 12, 13, 14)	0.002731	0.000910
11	12	0.784564	0.002028	[ 0.77959589 0.78025836 0.78575687 0.788141...	(0, 1, 2, 4, 5, 6, 9, 11, 12, 13, 14, 15)	0.002731	0.000910
12	13	0.784233	0.002003	[ 0.7793309 0.78045711 0.78476317 0.788141...	(0, 1, 2, 4, 5, 6, 7, 9, 11, 12, 13, 14, 15)	0.002696	0.000899
13	14	0.784716	0.002262	[ 0.78019212 0.78138456 0.7852269 0.788605...	(0, 1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15)	0.003046	0.001015
14	15	0.784756	0.002156	[ 0.77986088 0.78191454 0.78535939 0.789069...	(0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...	0.002902	0.000967
15	16	0.784756	0.002156	[ 0.77986088 0.78191454 0.78535939 0.789069...	(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...	0.002902	0.000967