In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from argparse import Namespace
import misc.logging_utils as logging_utils

args = Namespace()
logger = logging_utils.get_ipython_logger()

In [6]:
import misc.automl_utils as automl_utils

args.training_scenarios_dir = "/mldb/oasc_scenarios/train/"

training_scenarios = automl_utils.load_all_scenarios(args.training_scenarios_dir)


INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Bado
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Bado/ground_truth.arff
INFO     : Read /mldb/oasc_scenarios/train/Bado/description.txt
WARNING  : Have not found algorithm_cutoff_memory
WARNING  : Have not found features_cutoff_memory
WARNING  : Have not found features_stochastic
INFO     : Read /mldb/oasc_scenarios/train/Bado/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Bado/feature_values.arff
INFO     : Read /mldb/oasc_scenarios/train/Bado/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Bado/feature_costs.arff
INFO     : Read /mldb/oasc_scenarios/train/Bado/cv.arff
DEBUG    : Replace all runtime data with PAR10 values for non-OK runs
INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Camilla
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Camilla/ground_truth.arff
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Camilla/feature_costs.arff
INFO     : Read /mldb/oasc_scenarios/train/Camilla/description.txt
WARNING  : Have not found algorithm_cutoff_memory
WARNING  : Have not found features_cutoff_memory
WARNING  : Have not found features_stochastic
DEBUG    : Since we optimize quality, we use runtime cutoff of 1.
INFO     : Read /mldb/oasc_scenarios/train/Camilla/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Camilla/feature_values.arff
WARNING  : Found 3 duplicated feature vectors
WARNING  : Index(['instance_12', 'instance_15', 'instance_82', 'instance_83',
       'instance_85'],
      dtype='object', name='instance_id')
INFO     : Read /mldb/oasc_scenarios/train/Camilla/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Camilla/cv.arff
INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Caren
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Caren/ground_truth.arff
INFO     : Read /mldb/oasc_scenarios/train/Caren/description.txt
WARNING  : Have not found algorithm_cutoff_memory
WARNING  : Have not found features_cutoff_memory
WARNING  : Have not found features_stochastic
INFO     : Read /mldb/oasc_scenarios/train/Caren/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Caren/feature_values.arff
WARNING  : Found 3 duplicated feature vectors
WARNING  : Index(['instance_12', 'instance_15', 'instance_82', 'instance_83',
       'instance_85'],
      dtype='object', name='instance_id')
INFO     : Read /mldb/oasc_scenarios/train/Caren/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Caren/feature_costs.arff
INFO     : Read /mldb/oasc_scenarios/train/Caren/cv.arff
DEBUG    : Replace all runtime data with PAR10 values for non-OK runs
INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Magnus
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Magnus/ground_truth.arff
INFO     : Read /mldb/oasc_scenarios/train/Magnus/description.txt
WARNING  : Have not found algorithm_cutoff_memory
WARNING  : Have not found features_cutoff_memory
WARNING  : Have not found features_stochastic
INFO     : Read /mldb/oasc_scenarios/train/Magnus/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Magnus/feature_values.arff
WARNING  : Found 20 duplicated feature vectors
WARNING  : Index(['instance_11', 'instance_18', 'instance_498', 'instance_500',
       'instance_501', 'instance_502', 'instance_504', 'instance_51',
       'instance_513', 'instance_514', 'instance_517', 'instance_518',
       'instance_53', 'instance_54', 'instance_55', 'instance_559',
       'instance_56', 'instance_568', 'instance_57', 'instance_571',
       'instance_579', 'instance_58', 'instance_59', 'instance_590',
       'instance_592', 'instance_598', 'instance_600', 'instance_63',
       'instance_66', 'instance_70', 'instance_71', 'instance_72',
       'instance_73', 'instance_74'],
      dtype='object', name='instance_id')
INFO     : Read /mldb/oasc_scenarios/train/Magnus/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Magnus/feature_costs.arff
INFO     : Read /mldb/oasc_scenarios/train/Magnus/cv.arff
DEBUG    : Replace all runtime data with PAR10 values for non-OK runs
INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Mira
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Mira/ground_truth.arff
INFO     : Read /mldb/oasc_scenarios/train/Mira/description.txt
WARNING  : Have not found algorithm_cutoff_memory
WARNING  : Have not found features_cutoff_memory
WARNING  : Have not found features_stochastic
INFO     : Read /mldb/oasc_scenarios/train/Mira/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Mira/feature_values.arff
INFO     : Read /mldb/oasc_scenarios/train/Mira/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Mira/feature_costs.arff
INFO     : Read /mldb/oasc_scenarios/train/Mira/cv.arff
DEBUG    : Replace all runtime data with PAR10 values for non-OK runs
INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Monty
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Monty/ground_truth.arff
INFO     : Read /mldb/oasc_scenarios/train/Monty/description.txt
WARNING  : Have not found algorithm_cutoff_memory
WARNING  : Have not found features_cutoff_memory
WARNING  : Have not found features_stochastic
INFO     : Read /mldb/oasc_scenarios/train/Monty/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Monty/feature_values.arff
WARNING  : Found 14 duplicated feature vectors
WARNING  : Index(['instance_311', 'instance_324', 'instance_47', 'instance_51',
       'instance_532', 'instance_545', 'instance_548', 'instance_553',
       'instance_561', 'instance_566', 'instance_582', 'instance_594',
       'instance_598', 'instance_599', 'instance_601', 'instance_603',
       'instance_613', 'instance_617', 'instance_618', 'instance_620',
       'instance_621', 'instance_624', 'instance_627', 'instance_629',
       'instance_630'],
      dtype='object', name='instance_id')
INFO     : Read /mldb/oasc_scenarios/train/Monty/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Monty/feature_costs.arff
INFO     : Read /mldb/oasc_scenarios/train/Monty/cv.arff
DEBUG    : Replace all runtime data with PAR10 values for non-OK runs
INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Oberon
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Oberon/ground_truth.arff
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Oberon/feature_costs.arff
INFO     : Read /mldb/oasc_scenarios/train/Oberon/description.txt
WARNING  : Have not found algorithm_cutoff_memory
WARNING  : Have not found features_cutoff_time
DEBUG    : Assumption FEATURES_CUTOFF_TIME == ALGORITHM_CUTOFF_TIME 
WARNING  : Have not found features_cutoff_memory
WARNING  : Have not found features_stochastic
DEBUG    : Since we optimize quality, we use runtime cutoff of 1.
INFO     : Read /mldb/oasc_scenarios/train/Oberon/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Oberon/feature_values.arff
INFO     : Read /mldb/oasc_scenarios/train/Oberon/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Oberon/cv.arff
INFO     : Multiply all performance data by -1, since autofolio minimizes the scores but the objective is to maximize
INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Quill
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Quill/ground_truth.arff
INFO     : Read /mldb/oasc_scenarios/train/Quill/description.txt
WARNING  : Have not found algorithm_cutoff_memory
WARNING  : Have not found features_cutoff_memory
WARNING  : Have not found features_stochastic
INFO     : Read /mldb/oasc_scenarios/train/Quill/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Quill/feature_values.arff
WARNING  : Found 11 duplicated feature vectors
WARNING  : Index(['instance_135', 'instance_169', 'instance_196', 'instance_243',
       'instance_255', 'instance_276', 'instance_368', 'instance_488',
       'instance_620', 'instance_630', 'instance_666', 'instance_678',
       'instance_696', 'instance_764'],
      dtype='object', name='instance_id')
INFO     : Read /mldb/oasc_scenarios/train/Quill/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Quill/feature_costs.arff
INFO     : Read /mldb/oasc_scenarios/train/Quill/cv.arff
DEBUG    : Replace all runtime data with PAR10 values for non-OK runs
INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Sora
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Sora/ground_truth.arff
INFO     : Read /mldb/oasc_scenarios/train/Sora/description.txt
WARNING  : Have not found algorithm_cutoff_memory
WARNING  : Have not found features_cutoff_memory
WARNING  : Have not found features_stochastic
INFO     : Read /mldb/oasc_scenarios/train/Sora/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Sora/feature_values.arff
INFO     : Read /mldb/oasc_scenarios/train/Sora/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Sora/feature_costs.arff
INFO     : Read /mldb/oasc_scenarios/train/Sora/cv.arff
DEBUG    : Replace all runtime data with PAR10 values for non-OK runs
INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Svea
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Svea/ground_truth.arff
INFO     : Read /mldb/oasc_scenarios/train/Svea/description.txt
WARNING  : Have not found algorithm_cutoff_memory
WARNING  : Have not found features_cutoff_memory
WARNING  : Have not found features_stochastic
INFO     : Read /mldb/oasc_scenarios/train/Svea/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Svea/feature_values.arff
WARNING  : Found 12 duplicated feature vectors
WARNING  : Index(['instance_1023', 'instance_1205', 'instance_1207', 'instance_1475',
       'instance_1488', 'instance_170', 'instance_364', 'instance_412',
       'instance_47', 'instance_613', 'instance_741', 'instance_849',
       'instance_901', 'instance_914'],
      dtype='object', name='instance_id')
INFO     : Read /mldb/oasc_scenarios/train/Svea/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Svea/feature_costs.arff
INFO     : Read /mldb/oasc_scenarios/train/Svea/cv.arff
DEBUG    : Replace all runtime data with PAR10 values for non-OK runs
INFO     : Read ASlib scenario: /mldb/oasc_scenarios/train/Titus
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Titus/ground_truth.arff
WARNING  : Optional file not found: /mldb/oasc_scenarios/train/Titus/feature_costs.arff
INFO     : Read /mldb/oasc_scenarios/train/Titus/description.txt
WARNING  : Have not found algorithm_cutoff_memory
WARNING  : Have not found features_cutoff_memory
WARNING  : Have not found features_stochastic
DEBUG    : Since we optimize quality, we use runtime cutoff of 1.
INFO     : Read /mldb/oasc_scenarios/train/Titus/algorithm_runs.arff
INFO     : Read /mldb/oasc_scenarios/train/Titus/feature_values.arff
INFO     : Read /mldb/oasc_scenarios/train/Titus/feature_runstatus.arff
INFO     : Read /mldb/oasc_scenarios/train/Titus/cv.arff
INFO     : Multiply all performance data by -1, since autofolio minimizes the scores but the objective is to maximize

In [5]:
for s in training_scenarios.values():
    automl_utils.create_cv_splits(s)

In [ ]: