notebook.community

Edit and run



In [1]:

    
from sklearn.datasets import make_classification
from itertools import chain

import numpy as np

from sklearn.metrics.pairwise import rbf_kernel
from dpp import sample_dpp, decompose_kernel, sample_conditional_dpp
#from dpp_classifier_supervised import class_separability, evaluate_feats
from ogfs_classifier import spec_supervised, evaluate_feats



In [2]:

    
import pandas as pd



In [3]:

    
def get_config_indices(dataset_config):
    """
    Takes in the configuration used to create dataset configuration
    and then returns the indices with relevant information.
    
    Order of creation is:
    1.  first n_informative is informative
    2.  next n_redundant are redundant
    3.  next n_repeated are repeated features (either redundant or repeated)
    4.  The rest of the features are useless
    """
    config = {}
    config['n_informative'] = list(range(dataset_config['n_informative']))
    config['n_redundant'] = list(range(dataset_config['n_informative'], 
        dataset_config['n_informative'] + dataset_config['n_redundant']))
    config['n_repeated'] = list(range(dataset_config['n_informative'] + dataset_config['n_redundant'], 
        dataset_config['n_informative'] + dataset_config['n_redundant'] + dataset_config['n_repeated']))
    return config

def salient_info(sel_feats, dataset_config):
    n_f = float(dataset_config['n_features'])
    n_r = float(dataset_config['n_informative'])
    
    m_feats = get_config_indices(dataset_config)
    m_feat_vals = list(chain.from_iterable(get_config_indices(dataset_config).values()))
    
    # determine n_g
    n_g = 0.0
    
    n_info_sel = len([feat for feat in m_feats['n_informative'] if feat in sel_feats])
    n_red_sel = len([feat for feat in m_feats['n_redundant'] if feat in sel_feats])
    n_rep_sel = len([feat for feat in m_feats['n_repeated'] if feat in sel_feats])
    
    if n_red_sel == 0:
        n_g = min(n_info_sel+n_rep_sel, n_r)
    else:
        n_g = min(n_info_sel+n_rep_sel + (n_red_sel/2.0), n_r)
    return max(min(1.0, (n_g/n_r) + (n_g/n_f) - 1.0), -1.0), n_g/n_r, n_g/len(sel_feats), len(sel_feats)



In [4]:

    
def wilcoxon_group(X, f):
    """
    Wilcoxon is a very aggressive selector in an unsupervised sense. 
    Do we require a supervised group selection? (probably)
    
    Probably one that is score based in order to select the "best" ones
    similar to OGFS?
    """
    from scipy.stats import wilcoxon
    # X is a matrix, f is a single vector
    if len(X.shape) == 1:
        return wilcoxon(X, f).pvalue
    # now we shall perform and check each one...and return only the lowest pvalue
    return np.max([wilcoxon(x, f).pvalue for x in X.T])



In [5]:

    
def wilcoxon_feats(X, s_b, s_w, alpha=0.1):
    """
    Returns subset of features based on wilcoxon feats
    """
    s_ord = s_b/s_w
    eval_order = np.argsort(s_ord)
    eval_order = eval_order[::-1]
    selected_idx = []
    for idx in eval_order:
        if len(selected_idx) == 0:
            selected_idx.append(idx)
            continue
        wilcoxon_pval = wilcoxon_group(X[:, selected_idx], X[:, idx].flatten())
        if wilcoxon_pval < alpha:
            selected_idx.append(idx)
    return selected_idx



In [6]:

    
def dpp_sampler(X, y, k=None, mode='dpp'):
    """
    Takes in dataset and return 
    set of features based on:
    
    only dpp sampling...will extend for supervised/unsupervised
    criterion
    """
    feat_dist = rbf_kernel(X.T)
    feat_index = sample_dpp(decompose_kernel(feat_dist), k=k)
    
    X_sel = X[:, feat_index]
    s_b, s_w = spec_supervised(X_sel, y)
    col_sel = evaluate_feats(s_b, s_w)
    ul_sel = wilcoxon_feats(X_sel, s_b, s_w, 0.1)
    
    if mode in ['supervised']:
        feat_index = list(np.array(feat_index)[col_sel]) + list(np.array(feat_index)[ul_sel])
        feat_index = list(set(feat_index))
    
    if mode in ['unsupervised']:
        feat_index = list(np.array(feat_index)[ul_sel])
    return feat_index



In [7]:

    
def dpp_sampler_all(X, y, k=None):
    """
    Takes in dataset and return 
    set of features based on:
    
    only dpp sampling...will extend for supervised/unsupervised
    criterion
    """
    all_info = {}
    feat_dist = rbf_kernel(X.T)
    feat_index = sample_dpp(decompose_kernel(feat_dist), k=k)
    all_info['dpp'] = feat_index[:]
    
    X_sel = X[:, feat_index]
    s_b, s_w = spec_supervised(X_sel, y)
    col_sel = evaluate_feats(s_b, s_w)
    ul_sel = wilcoxon_feats(X_sel, s_b, s_w, 0.05)
    
    feat_index = list(np.array(all_info['dpp'][:])[col_sel]) + list(np.array(all_info['dpp'][:])[ul_sel])
    feat_index = list(set(feat_index))
    all_info['sl'] = feat_index[:]
    
    feat_index = list(np.array(all_info['dpp'][:])[ul_sel])
    all_info['ul'] = feat_index[:]
    return all_info



In [8]:

    
dataset_config = {
    'n_features': 20, 
    'n_informative': 10,
    'n_redundant': 2,
    'n_repeated': 2
}
X, y = make_classification(random_state=0, shuffle=False,
**dataset_config)



In [9]:

    
def single_run(dataset_config, k=None):
    X, y = make_classification(random_state=0, shuffle=False,
**dataset_config)
    feats = None
    while feats is None:
        feats = dpp_sampler_all(X, y, k=k)
    metrics = {}
    metrics['dpp'] = salient_info(feats['dpp'], dataset_config)
    metrics['sl'] = salient_info(feats['sl'], dataset_config)
    metrics['ul'] = salient_info(feats['ul'], dataset_config)
    
    # make metrics a dataframe...
    df_info = [
        {'algorithm': 'dpp', 'saliency': metrics['dpp'][0], 'inform_perc': metrics['dpp'][1], 'ratio': metrics['dpp'][2], 'compact': metrics['dpp'][3]},
        {'algorithm': 'sl', 'saliency': metrics['sl'][0], 'inform_perc': metrics['sl'][1], 'ratio': metrics['sl'][2], 'compact': metrics['sl'][3]},
        {'algorithm': 'ul', 'saliency': metrics['ul'][0], 'inform_perc': metrics['ul'][1], 'ratio': metrics['ul'][2], 'compact': metrics['ul'][3]}
    ]
    return pd.DataFrame(df_info)



In [10]:

    
dataset_config = {
    'n_features': 20, 
    'n_informative': 10,
    'n_redundant': 2,
    'n_repeated': 2
}



In [11]:

    
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    Out[11]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      8.8
      0.580
      0.650795
      -0.1300
    
    
      1
      sl
      5.7
      0.395
      0.705476
      -0.4075
    
    
      2
      ul
      3.0
      0.190
      0.633333
      -0.7150



In [12]:

    
dataset_config = {
    'n_features': 200, 
    'n_informative': 10,
    'n_redundant': 2,
    'n_repeated': 2
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    Out[12]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      89.4
      0.600
      0.067504
      -0.37000
    
    
      1
      sl
      21.7
      0.265
      0.409242
      -0.72175
    
    
      2
      ul
      2.9
      0.160
      0.550000
      -0.83200



In [13]:

    
dataset_config = {
    'n_features': 200, 
    'n_informative': 20,
    'n_redundant': 5,
    'n_repeated': 5
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    



C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\morestats.py:2397: UserWarning: Warning: sample size too small for normal approximation.
  warnings.warn("Warning: sample size too small for normal approximation.")
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\morestats.py:2422: RuntimeWarning: invalid value encountered in double_scalars
  z = (T - mn - correction) / se
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in greater
  return (self.a < x) & (x < self.b)
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in less
  return (self.a < x) & (x < self.b)
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:1818: RuntimeWarning: invalid value encountered in less_equal
  cond2 = cond0 & (x <= self.a)






    Out[13]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      88.6
      0.5600
      0.125780
      -0.38400
    
    
      1
      sl
      55.6
      0.4175
      0.384050
      -0.54075
    
    
      2
      ul
      2.2
      0.0925
      0.858333
      -0.89825



In [14]:

    
dataset_config = {
    'n_features': 200, 
    'n_informative': 100,
    'n_redundant': 10,
    'n_repeated': 10
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    



C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\morestats.py:2397: UserWarning: Warning: sample size too small for normal approximation.
  warnings.warn("Warning: sample size too small for normal approximation.")
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\morestats.py:2422: RuntimeWarning: invalid value encountered in double_scalars
  z = (T - mn - correction) / se
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in greater
  return (self.a < x) & (x < self.b)
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in less
  return (self.a < x) & (x < self.b)
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:1818: RuntimeWarning: invalid value encountered in less_equal
  cond2 = cond0 & (x <= self.a)






    Out[14]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      96.8
      0.4645
      0.481640
      -0.30325
    
    
      1
      sl
      14.4
      0.0770
      0.928571
      -0.88450
    
    
      2
      ul
      1.9
      0.0190
      1.000000
      -0.97150



In [15]:

    
dataset_config = {
    'n_features': 200, 
    'n_informative': 50,
    'n_redundant': 10,
    'n_repeated': 10
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    



C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\morestats.py:2397: UserWarning: Warning: sample size too small for normal approximation.
  warnings.warn("Warning: sample size too small for normal approximation.")
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\morestats.py:2422: RuntimeWarning: invalid value encountered in double_scalars
  z = (T - mn - correction) / se
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in greater
  return (self.a < x) & (x < self.b)
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in less
  return (self.a < x) & (x < self.b)
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:1818: RuntimeWarning: invalid value encountered in less_equal
  cond2 = cond0 & (x <= self.a)






    Out[15]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      87.5
      0.473
      0.271185
      -0.40875
    
    
      1
      sl
      19.9
      0.139
      0.678467
      -0.82625
    
    
      2
      ul
      2.9
      0.044
      0.790833
      -0.94500



In [16]:

    
dataset_config = {
    'n_features': 40, 
    'n_informative': 20,
    'n_redundant': 0,
    'n_repeated': 10, 
    'n_clusters_per_class': 4
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    



C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\morestats.py:2397: UserWarning: Warning: sample size too small for normal approximation.
  warnings.warn("Warning: sample size too small for normal approximation.")
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\morestats.py:2422: RuntimeWarning: invalid value encountered in double_scalars
  z = (T - mn - correction) / se
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in greater
  return (self.a < x) & (x < self.b)
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in less
  return (self.a < x) & (x < self.b)
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:1818: RuntimeWarning: invalid value encountered in less_equal
  cond2 = cond0 & (x <= self.a)






    Out[16]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      16.3
      0.54
      0.660231
      -0.190
    
    
      1
      sl
      8.6
      0.31
      0.782710
      -0.535
    
    
      2
      ul
      1.8
      0.08
      0.900000
      -0.880



In [17]:

    
dataset_config = {
    'n_features': 200, 
    'n_informative': 20,
    'n_redundant': 0,
    'n_repeated': 5, 
    'n_clusters_per_class': 4
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    Out[17]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      86.6
      0.485
      0.111945
      -0.4665
    
    
      1
      sl
      54.8
      0.340
      0.320470
      -0.6260
    
    
      2
      ul
      1.8
      0.070
      0.800000
      -0.9230

Mandelon



In [18]:

    
# mandelon NIPS 2003 setup (see uci: http://archive.ics.uci.edu/ml/datasets/madelon)
# http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html
dataset_config = {
    'n_features': 500, 
    'n_informative': 5,
    'n_redundant': 15,
    'n_repeated': 0, 
    'n_clusters_per_class': 16
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    Out[18]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      214.4
      0.93
      0.021708
      -0.0607
    
    
      1
      sl
      25.7
      0.29
      0.338046
      -0.7071
    
    
      2
      ul
      1.3
      0.11
      0.550000
      -0.8889



In [19]:

    
# mandelon NIPS 2003 setup (see uci: http://archive.ics.uci.edu/ml/datasets/madelon)
# http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html
dataset_config = {
    'n_features': 500, 
    'n_informative': 25,
    'n_redundant': 15,
    'n_repeated': 2, 
    'n_clusters_per_class': 16
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    Out[19]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      217.0
      0.534
      0.061414
      -0.4393
    
    
      1
      sl
      88.9
      0.232
      0.120928
      -0.7564
    
    
      2
      ul
      2.6
      0.022
      0.208333
      -0.9769



In [20]:

    
# mandelon NIPS 2003 setup (see uci: http://archive.ics.uci.edu/ml/datasets/madelon)
# http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html
dataset_config = {
    'n_features': 500, 
    'n_informative': 50,
    'n_redundant': 15,
    'n_repeated': 4, 
    'n_clusters_per_class': 16
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    Out[20]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      215.0
      0.476
      0.110674
      -0.4764
    
    
      1
      sl
      86.9
      0.221
      0.303620
      -0.7569
    
    
      2
      ul
      2.2
      0.026
      0.608333
      -0.9714



In [21]:

    
# mandelon NIPS 2003 setup (see uci: http://archive.ics.uci.edu/ml/datasets/madelon)
# http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html
dataset_config = {
    'n_features': 500, 
    'n_informative': 75,
    'n_redundant': 15,
    'n_repeated': 6, 
    'n_clusters_per_class': 16
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    Out[21]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      223.6
      0.479333
      0.161001
      -0.448767
    
    
      1
      sl
      180.1
      0.382000
      0.173122
      -0.560700
    
    
      2
      ul
      3.1
      0.018000
      0.470833
      -0.979300



In [22]:

    
# mandelon NIPS 2003 setup (see uci: http://archive.ics.uci.edu/ml/datasets/madelon)
# http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html
dataset_config = {
    'n_features': 500, 
    'n_informative': 100,
    'n_redundant': 15,
    'n_repeated': 8, 
    'n_clusters_per_class': 16
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    



C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\morestats.py:2397: UserWarning: Warning: sample size too small for normal approximation.
  warnings.warn("Warning: sample size too small for normal approximation.")
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\morestats.py:2422: RuntimeWarning: invalid value encountered in double_scalars
  z = (T - mn - correction) / se
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in greater
  return (self.a < x) & (x < self.b)
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in less
  return (self.a < x) & (x < self.b)
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:1818: RuntimeWarning: invalid value encountered in less_equal
  cond2 = cond0 & (x <= self.a)






    Out[22]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      213.7
      0.4585
      0.213980
      -0.4498
    
    
      1
      sl
      66.4
      0.1445
      0.314841
      -0.8266
    
    
      2
      ul
      3.0
      0.0135
      0.429167
      -0.9838



In [23]:

    
# mandelon NIPS 2003 setup (see uci: http://archive.ics.uci.edu/ml/datasets/madelon)
# http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html
dataset_config = {
    'n_features': 500, 
    'n_informative': 125,
    'n_redundant': 15,
    'n_repeated': 10, 
    'n_clusters_per_class': 16
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    



C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\morestats.py:2397: UserWarning: Warning: sample size too small for normal approximation.
  warnings.warn("Warning: sample size too small for normal approximation.")
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\morestats.py:2422: RuntimeWarning: invalid value encountered in double_scalars
  z = (T - mn - correction) / se
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in greater
  return (self.a < x) & (x < self.b)
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in less
  return (self.a < x) & (x < self.b)
C:\Users\chapm\Anaconda3\envs\skrecipe\lib\site-packages\scipy\stats\_distn_infrastructure.py:1818: RuntimeWarning: invalid value encountered in less_equal
  cond2 = cond0 & (x <= self.a)






    Out[23]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      225.0
      0.4704
      0.261204
      -0.4120
    
    
      1
      sl
      160.3
      0.3396
      0.324031
      -0.5755
    
    
      2
      ul
      2.3
      0.0124
      0.650000
      -0.9845



In [24]:

    
# mandelon NIPS 2003 setup (see uci: http://archive.ics.uci.edu/ml/datasets/madelon)
# http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html
dataset_config = {
    'n_features': 500, 
    'n_informative': 150,
    'n_redundant': 15,
    'n_repeated': 12, 
    'n_clusters_per_class': 16
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    Out[24]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      217.6
      0.435333
      0.300184
      -0.434067
    
    
      1
      sl
      113.2
      0.225000
      0.335526
      -0.707500
    
    
      2
      ul
      3.1
      0.009667
      0.487500
      -0.987433



In [25]:

    
# mandelon NIPS 2003 setup (see uci: http://archive.ics.uci.edu/ml/datasets/madelon)
# http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html
dataset_config = {
    'n_features': 500, 
    'n_informative': 200,
    'n_redundant': 15,
    'n_repeated': 15, 
    'n_clusters_per_class': 16
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    Out[25]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      229.5
      0.4680
      0.407878
      -0.3448
    
    
      1
      sl
      114.8
      0.2320
      0.599519
      -0.6752
    
    
      2
      ul
      2.5
      0.0095
      0.816667
      -0.9867



In [26]:

    
# mandelon NIPS 2003 setup (see uci: http://archive.ics.uci.edu/ml/datasets/madelon)
# http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html
dataset_config = {
    'n_features': 500, 
    'n_informative': 250,
    'n_redundant': 15,
    'n_repeated': 15, 
    'n_clusters_per_class': 16
}
feat_info = pd.concat([single_run(dataset_config) for x in range(10)])
feat_info.groupby('algorithm').mean().reset_index()









    Out[26]:







  
    
      
      algorithm
      compact
      inform_perc
      ratio
      saliency
    
  
  
    
      0
      dpp
      235.1
      0.4404
      0.468434
      -0.3394
    
    
      1
      sl
      49.3
      0.0956
      0.526225
      -0.8566
    
    
      2
      ul
      3.0
      0.0064
      0.550000
      -0.9904



In [ ]:

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	8.8	0.580	0.650795	-0.1300
1	sl	5.7	0.395	0.705476	-0.4075
2	ul	3.0	0.190	0.633333	-0.7150

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	89.4	0.600	0.067504	-0.37000
1	sl	21.7	0.265	0.409242	-0.72175
2	ul	2.9	0.160	0.550000	-0.83200

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	88.6	0.5600	0.125780	-0.38400
1	sl	55.6	0.4175	0.384050	-0.54075
2	ul	2.2	0.0925	0.858333	-0.89825

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	96.8	0.4645	0.481640	-0.30325
1	sl	14.4	0.0770	0.928571	-0.88450
2	ul	1.9	0.0190	1.000000	-0.97150

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	87.5	0.473	0.271185	-0.40875
1	sl	19.9	0.139	0.678467	-0.82625
2	ul	2.9	0.044	0.790833	-0.94500

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	16.3	0.54	0.660231	-0.190
1	sl	8.6	0.31	0.782710	-0.535
2	ul	1.8	0.08	0.900000	-0.880

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	86.6	0.485	0.111945	-0.4665
1	sl	54.8	0.340	0.320470	-0.6260
2	ul	1.8	0.070	0.800000	-0.9230

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	214.4	0.93	0.021708	-0.0607
1	sl	25.7	0.29	0.338046	-0.7071
2	ul	1.3	0.11	0.550000	-0.8889

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	217.0	0.534	0.061414	-0.4393
1	sl	88.9	0.232	0.120928	-0.7564
2	ul	2.6	0.022	0.208333	-0.9769

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	215.0	0.476	0.110674	-0.4764
1	sl	86.9	0.221	0.303620	-0.7569
2	ul	2.2	0.026	0.608333	-0.9714

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	223.6	0.479333	0.161001	-0.448767
1	sl	180.1	0.382000	0.173122	-0.560700
2	ul	3.1	0.018000	0.470833	-0.979300

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	213.7	0.4585	0.213980	-0.4498
1	sl	66.4	0.1445	0.314841	-0.8266
2	ul	3.0	0.0135	0.429167	-0.9838

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	225.0	0.4704	0.261204	-0.4120
1	sl	160.3	0.3396	0.324031	-0.5755
2	ul	2.3	0.0124	0.650000	-0.9845

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	217.6	0.435333	0.300184	-0.434067
1	sl	113.2	0.225000	0.335526	-0.707500
2	ul	3.1	0.009667	0.487500	-0.987433

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	229.5	0.4680	0.407878	-0.3448
1	sl	114.8	0.2320	0.599519	-0.6752
2	ul	2.5	0.0095	0.816667	-0.9867

	algorithm	compact	inform_perc	ratio	saliency
0	dpp	235.1	0.4404	0.468434	-0.3394
1	sl	49.3	0.0956	0.526225	-0.8566
2	ul	3.0	0.0064	0.550000	-0.9904