Our results added the following to the DKT:

  1. shuffled train students before each train epoch
  2. shuffled test students before each test epoch
  3. did not truncate students after 10-th; instead, used them all in various ways
  4. fully used all the students in a batch

and the results turned out to be totally different.

Compared to the original paper result, ...

09-10 (a): 1111 + AUC 0.81 + r2 0.29 >>>>>>> turned out to be AUC 0.761 + r2 0.202
09-10 (b): 0111 + AUC 0.82 + r2 0.31 >>>>>>> turned out to be AUC 0.759 + r2 0.198
09-10 (c): 0110 + joint skill + AUC 0.75 + r2 0.18 >>>>>>> not yet applied for our project


In [1]:
import csv
import pandas as pd
import numpy as np
import unicodedata
from RNN_Datapreprocessor import RNN_Datapreprocessor as runner
from itertools import zip_longest
import os

In [2]:
def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        pass
    
    try:
        unicodedata.numeric(s)
        return True
    except (TypeError, ValueError):
        pass
    
    return False

In [3]:
def index2d(list2d, value):
    return next((i, j) for i, lst in enumerate(list2d) 
                for j, x in enumerate(lst) if x == value)

In [4]:
def read_log_results(datapath, encoding='iso-8859-1'):
    if (not os.path.exists(datapath)):
        return None, None, None
    rmse = [[], []]
    auc = [[], []]
    r2 = [[], []]
    
    with open(datapath) as logfile:
        FLAG = None
        for line in logfile:
            contents = line.split()
            if (0 == len(contents)):
                continue
            
            if ('Train' in contents):
                FLAG = True
            elif ('Test' in contents):
                FLAG = False
            else:
                test_or_train_i = int(FLAG)
                
                for content_i in range(len(contents)):
                    if ('rmse' in contents[content_i]):
                        rmse_i = content_i
                    elif ('auc' in contents[content_i]):
                        auc_i = content_i
                    elif ('r2' in contents[content_i]):
                        r2_i = content_i
                
                for content_i in range(rmse_i, auc_i):
                    if (is_number(contents[content_i])):
                        rmse[test_or_train_i].append(float(contents[content_i]))
                        break
                
                for content_i in range(auc_i, r2_i):
                    if (is_number(contents[content_i])):
                        auc[test_or_train_i].append(float(contents[content_i]))
                        break
                
                for content_i in range(r2_i):
                    if (is_number(contents[content_i])):
                        r2[test_or_train_i].append(float(contents[content_i]))
                        break
        
        if (0 == len(auc[0])):
            return None, None, None

        max_test_auc_i = np.argmax(a=auc[0])

        if (max_test_auc_i + 1 == len(auc[0])):
            print('this network might not have converged')
            print('printing the datapath')
            print(datapath)
            print()
    
    return rmse, auc, r2

In [5]:
# make datapreprocessor config
datapreprocessor_config = {
    'dataset': 'Assistments',
    'version': '2009'
}

In [6]:
def read_one_hot_default_all_results(run_config, split_rate=20):    
    rmse_all = []
    auc_all = []
    r2_all = []
    for config_index in range(16):
        binary_index = format(config_index, '04b')
        config_arr = []

        for i in binary_index:
            i_int = int(i)
            i_bool = bool(i_int)
            config_arr.append(i_bool)

        data_config = {
            'split_rate': split_rate,
            'method': 'default',
            'has_scaffolding': config_arr[0],
            'count_no_skill_id': config_arr[1],
            'has_test_mode': config_arr[2],
            'allow_multi_skills': config_arr[3],
            'one_hot': True
        }

        rnn_runner = runner()
        rnn_runner.datapreprocessor = rnn_runner.load_datapreprocessor(datapreprocessor_config)
        rnn_runner.datapreprocessor.set_config(data_config)
        rnn_runner.data_config = data_config
        log_path = rnn_runner.get_result_path(run_config=run_config)
        
        rmse, auc, r2 = read_log_results(datapath=log_path)
        
        if (None == rmse or None == auc or None == r2):
            print('one_hot_default_all incomplete from ', config_index)
            print()
            continue
        
        supposed_num_epochs = int(run_config.get('num_epochs', 30))
        test_epoch_length_alright = (len(rmse[0]) 
                                     == len(auc[0]) 
                                     == len(r2[0]) 
                                     == (supposed_num_epochs / 5))
        train_epoch_length_alright = (len(rmse[1]) 
                                     == len(auc[1]) 
                                     == len(r2[1]) 
                                     == supposed_num_epochs)
        
        if (not test_epoch_length_alright):
            print('test_epoch_length error; printing log_path')
            print(log_path)
            #print('len(rmse[0]): ', len(rmse[0]))
            #print('len(auc[0]): ', len(auc[0]))
            #print('len(r2[0]): ', len(r2[0]))
            #print('supposed_num_epochs / 5: ', supposed_num_epochs / 5)
            print('one_hot_default_all incomplete from ', config_index)
            print()
            continue
        
        if (not train_epoch_length_alright):
            print('train_epoch_length error; printing log_path')
            print(log_path)
            #print('len(rmse[1]): ', len(rmse[1]))
            #print('len(auc[1]): ', len(auc[1]))
            #print('len(r2[1]): ', len(r2[1]))
            #print('supposed_num_epochs: ', supposed_num_epochs)
            print('one_hot_default_all incomplete from ', config_index)
            print()
            continue

        rmse_all.append(rmse)
        auc_all.append(auc)
        r2_all.append(r2)
    
    rmse_all = [list(x) for x in zip_longest(*rmse_all)]
    auc_all = [list(x) for x in zip_longest(*auc_all)]
    r2_all = [list(x) for x in zip_longest(*r2_all)]

    return rmse_all, auc_all, r2_all

In [7]:
def read_one_hot_sliding_window_all_results(run_config, split_rate=0.2):
    all_config_data = {}
    
    test_formats = ['overlapping_last_element', 'same_as_training', 'partition', 'default']
    
    for format_i in range(len(test_formats)):
        rmse_all = []
        auc_all = []
        r2_all = []
        for config_index in range(16):
            binary_index = format(config_index, '04b')
            config_arr = []

            for i in binary_index:
                i_int = int(i)
                i_bool = bool(i_int)
                config_arr.append(i_bool)

            data_config = {
                'split_rate': split_rate,
                'method': 'sliding_window',
                'has_scaffolding': config_arr[0],
                'count_no_skill_id': config_arr[1],
                'has_test_mode': config_arr[2],
                'allow_multi_skills': config_arr[3],
                'window_length': 10,
                'test_format': test_formats[format_i],
                'one_hot': True
            }

            rnn_runner = runner()
            rnn_runner.datapreprocessor = rnn_runner.load_datapreprocessor(datapreprocessor_config)
            rnn_runner.datapreprocessor.set_config(data_config)
            rnn_runner.data_config = data_config
            log_path = rnn_runner.get_result_path(run_config=run_config)
            
            rmse, auc, r2 = read_log_results(datapath=log_path)
            
            if (None == rmse or None == auc or None == r2):
                print('one_hot_sw_all with test format ', test_formats[format_i], ' incomplete from ', config_index)
                print()
                continue
            
            supposed_num_epochs = int(run_config.get('num_epochs', 30))
            test_epoch_length_alright = (len(rmse[0]) 
                                         == len(auc[0]) 
                                         == len(r2[0]) 
                                         == (supposed_num_epochs / 5))
            train_epoch_length_alright = (len(rmse[1]) 
                                         == len(auc[1]) 
                                         == len(r2[1]) 
                                         == supposed_num_epochs)

            if (not test_epoch_length_alright):
                print('test_epoch_length error; printing log_path')
                print(log_path)
                #print('len(rmse[0]): ', len(rmse[0]))
                #print('len(auc[0]): ', len(auc[0]))
                #print('len(r2[0]): ', len(r2[0]))
                #print('supposed_num_epochs / 5: ', supposed_num_epochs / 5)
                print('one_hot_sw_all with test format ', test_formats[format_i], ' incomplete from ', config_index)
                print()
                continue

            if (not train_epoch_length_alright):
                print('train_epoch_length error; printing log_path')
                print(log_path)
                #print('len(rmse[1]): ', len(rmse[1]))
                #print('len(auc[1]): ', len(auc[1]))
                #print('len(r2[1]): ', len(r2[1]))
                #print('supposed_num_epochs: ', supposed_num_epochs)
                print('one_hot_sw_all with test format ', test_formats[format_i], ' incomplete from ', config_index)
                print()
                continue

            rmse_all.append(rmse)
            auc_all.append(auc)
            r2_all.append(r2)
        
        rmse_all = [list(x) for x in zip_longest(*rmse_all)]
        auc_all = [list(x) for x in zip_longest(*auc_all)]
        r2_all = [list(x) for x in zip_longest(*r2_all)]
        
        all_config_data[test_formats[format_i]] = (rmse_all, auc_all, r2_all)

    return all_config_data

In [8]:
def read_not_one_hot_default_all_results(run_config, split_rate=0.2):
    rmse_all = []
    auc_all = []
    r2_all = []
    for config_index in range(8):
        binary_index = format(config_index, '03b')
        config_arr = []

        for i in binary_index:
            i_int = int(i)
            i_bool = bool(i_int)
            config_arr.append(i_bool)

        data_config = {
            'split_rate': split_rate,
            'method': 'default',
            'has_scaffolding': config_arr[0],
            'count_no_skill_id': config_arr[1],
            'has_test_mode': config_arr[2],
            'allow_multi_skills': True,
            'one_hot': False
        }

        rnn_runner = runner()
        rnn_runner.datapreprocessor = rnn_runner.load_datapreprocessor(datapreprocessor_config)
        rnn_runner.datapreprocessor.set_config(data_config)
        rnn_runner.data_config = data_config
        log_path = rnn_runner.get_result_path(run_config=run_config)
        
        rmse, auc, r2 = read_log_results(datapath=log_path)
        
        if (None == rmse or None == auc or None == r2):
            print('not_one_hot_default_all incomplete from ', config_index)
            print()
            continue

        supposed_num_epochs = int(run_config.get('num_epochs', 30))
        test_epoch_length_alright = (len(rmse[0]) 
                                     == len(auc[0]) 
                                     == len(r2[0]) 
                                     == (supposed_num_epochs / 5))
        train_epoch_length_alright = (len(rmse[1]) 
                                     == len(auc[1]) 
                                     == len(r2[1]) 
                                     == supposed_num_epochs)
        
        if (not test_epoch_length_alright):
            print('test_epoch_length error; printing log_path')
            print(log_path)
            #print('len(rmse[0]): ', len(rmse[0]))
            #print('len(auc[0]): ', len(auc[0]))
            #print('len(r2[0]): ', len(r2[0]))
            #print('supposed_num_epochs / 5: ', supposed_num_epochs / 5)
            print('not_one_hot_default_all incomplete from ', config_index)
            print()
            continue
        
        if (not train_epoch_length_alright):
            print('train_epoch_length error; printing log_path')
            print(log_path)
            #print('len(rmse[1]): ', len(rmse[1]))
            #print('len(auc[1]): ', len(auc[1]))
            #print('len(r2[1]): ', len(r2[1]))
            #print('supposed_num_epochs: ', supposed_num_epochs)
            print('not_one_hot_default_all incomplete from ', config_index)
            print()
            continue

        rmse_all.append(rmse)
        auc_all.append(auc)
        r2_all.append(r2)
        
    rmse_all = [list(x) for x in zip_longest(*rmse_all)]
    auc_all = [list(x) for x in zip_longest(*auc_all)]
    r2_all = [list(x) for x in zip_longest(*r2_all)]

    return rmse_all, auc_all, r2_all

In [9]:
def read_not_one_hot_sliding_window_all_results(run_config, split_rate=0.2):
    all_config_data = {}
    
    test_formats = ['overlapping_last_element', 'same_as_training', 'partition', 'default']
    
    for format_i in range(len(test_formats)):
        rmse_all = []
        auc_all = []
        r2_all = []
        for config_index in range(8):
            binary_index = format(config_index, '03b')
            config_arr = []

            for i in binary_index:
                i_int = int(i)
                i_bool = bool(i_int)
                config_arr.append(i_bool)

            data_config = {
                'split_rate': split_rate,
                'method': 'sliding_window',
                'has_scaffolding': config_arr[0],
                'count_no_skill_id': config_arr[1],
                'has_test_mode': config_arr[2],
                'allow_multi_skills': True,
                'window_length': 10,
                'test_format': test_formats[format_i],
                'one_hot': False
            }

            rnn_runner = runner()
            rnn_runner.datapreprocessor = rnn_runner.load_datapreprocessor(datapreprocessor_config)
            rnn_runner.datapreprocessor.set_config(data_config)
            rnn_runner.data_config = data_config
            log_path = rnn_runner.get_result_path(run_config=run_config)
            
            rmse, auc, r2 = read_log_results(datapath=log_path)
            if (None == rmse or None== auc or None == r2):
                print('not_one_hot_sw_all with test format ', test_formats[format_i], ' incomplete from ', config_index)
                print()
                continue
            
            supposed_num_epochs = int(run_config.get('num_epochs', 30))
            test_epoch_length_alright = (len(rmse[0]) 
                                         == len(auc[0]) 
                                         == len(r2[0]) 
                                         == (supposed_num_epochs / 5))
            train_epoch_length_alright = (len(rmse[1]) 
                                         == len(auc[1]) 
                                         == len(r2[1]) 
                                         == supposed_num_epochs)

            if (not test_epoch_length_alright):
                print('test_epoch_length error; printing log_path')
                print(log_path)
                #print('len(rmse[0]): ', len(rmse[0]))
                #print('len(auc[0]): ', len(auc[0]))
                #print('len(r2[0]): ', len(r2[0]))
                #print('supposed_num_epochs / 5: ', supposed_num_epochs / 5)
                print('not_one_hot_sw_all with test format ', test_formats[format_i], ' incomplete from ', config_index)
                print()
                continue

            if (not train_epoch_length_alright):
                print('train_epoch_length error; printing log_path')
                print(log_path)
                #print('len(rmse[1]): ', len(rmse[1]))
                #print('len(auc[1]): ', len(auc[1]))
                #print('len(r2[1]): ', len(r2[1]))
                #print('supposed_num_epochs: ', supposed_num_epochs)
                print('not_one_hot_sw_all with test format ', test_formats[format_i], ' incomplete from ', config_index)
                print()
                continue

            rmse_all.append(rmse)
            auc_all.append(auc)
            r2_all.append(r2)
        
        rmse_all = [list(x) for x in zip_longest(*rmse_all)]
        auc_all = [list(x) for x in zip_longest(*auc_all)]
        r2_all = [list(x) for x in zip_longest(*r2_all)]
        
        all_config_data[test_formats[format_i]] = (rmse_all, auc_all, r2_all)

    return all_config_data

In [10]:
def logs_to_df(run_config, split_rate=0.2):
    
    rmse = []
    auc = []
    r2 = []
    results = []
    
    results.append(read_one_hot_default_all_results(run_config, split_rate))
    
    one_hot_sliding_window_all_dict = read_one_hot_sliding_window_all_results(run_config, split_rate)
    results.append(one_hot_sliding_window_all_dict.get('overlapping_last_element'))
    results.append(one_hot_sliding_window_all_dict.get('partition'))
    results.append(one_hot_sliding_window_all_dict.get('default'))
    results.append(one_hot_sliding_window_all_dict.get('same_as_training'))
    
    results.append(read_not_one_hot_default_all_results(run_config, split_rate))
    
    not_one_hot_sliding_window_all_dict = read_not_one_hot_sliding_window_all_results(run_config, split_rate)
    results.append(not_one_hot_sliding_window_all_dict.get('overlapping_last_element'))
    results.append(not_one_hot_sliding_window_all_dict.get('partition'))
    results.append(not_one_hot_sliding_window_all_dict.get('default'))
    results.append(not_one_hot_sliding_window_all_dict.get('same_as_training'))
    
    
    index=['one_hot_default', 
           'one_hot_sw_overlap', 
           'one_hot_sw_partition', 
           'one_hot_sw_default', 
           'one_hot_sw_same_as_training',
           'not_one_hot_default', 
           'not_one_hot_sw_overlap', 
           'not_one_hot_sw_partition', 
           'not_one_hot_sw_default', 
           'not_one_hot_sw_same_as_training']
    
    test_df = pd.DataFrame(columns=['min_rmse', 
                                    'max_r2', 
                                    'max_auc',
                                    'max_auc_config_code',
                                    'max_auc_epoch', 
                                    'train_auc-max_auc', 
                                    'train_rmse', 
                                    'train_r2'], 
                           dtype=np.float32, index=index)
    
    train_df = pd.DataFrame(columns=['min_rmse', 
                                     'max_r2', 
                                     'max_auc',
                                     'max_auc_config_code',
                                     'max_auc_epoch'], 
                            dtype=np.float32, index=index)
    
    for i in range(len(index)):
        rmse_all = results[i][0]
        auc_all = results[i][1]
        r2_all = results[i][2]
        
        test_rmse_all = rmse_all[:][0]
        train_rmse_all = rmse_all[:][1]
        
        test_auc_all = auc_all[0]
        train_auc_all = auc_all[1]
        
        test_r2_all = r2_all[0]
        train_r2_all = r2_all[1]
        
        config_length = len(rmse_all[0])
        
        test_min_rmse = min(min(test_rmse_all))
        train_min_rmse = min(min(train_rmse_all))
        
        test_max_r2 = max(max(test_r2_all))
        train_max_r2 = max(max(train_r2_all))
        
        test_max_auc = max(max(test_auc_all))
        train_max_auc = max(max(train_auc_all))
        
        test_max_auc_epoch_i = index2d(test_auc_all, test_max_auc)
        train_max_auc_epoch_i = index2d(train_auc_all, train_max_auc)
        
        if (i < 5):
            test_max_auc_data_config_code = format(test_max_auc_epoch_i[0], '04b')
            train_max_auc_data_config_code = format(train_max_auc_epoch_i[0], '04b')
        else:
            test_max_auc_data_config_code = format(test_max_auc_epoch_i[0], '03b') + '1'
            train_max_auc_data_config_code = format(train_max_auc_epoch_i[0], '03b') + '1'
        
        test_max_auc_epoch = int((test_max_auc_epoch_i[1] + 1) * 5)
        train_max_auc_epoch = int(train_max_auc_epoch_i[1] + 1)
        
        train_auc_diff = (train_auc_all[test_max_auc_epoch_i[0]][test_max_auc_epoch - 1]
                          - test_max_auc)
        
        train_rmse = train_rmse_all[test_max_auc_epoch_i[0]][test_max_auc_epoch - 1]
        train_r2 = train_r2_all[test_max_auc_epoch_i[0]][test_max_auc_epoch - 1]
        
        test_df.loc[index[i]] = ([test_min_rmse, 
                                  test_max_r2, 
                                  test_max_auc,
                                  test_max_auc_data_config_code,
                                  test_max_auc_epoch, 
                                  train_auc_diff, 
                                  train_rmse, 
                                  train_r2])
        train_df.loc[index[i]] = ([train_min_rmse, 
                                   train_max_r2,
                                   train_max_auc,
                                   train_max_auc_data_config_code,
                                   train_max_auc_epoch])
    
    test_df.loc[:, 'max_auc_epoch'] = test_df.loc[:, 'max_auc_epoch'].astype('int')
    train_df.loc[:, 'max_auc_epoch'] = train_df.loc[:, 'max_auc_epoch'].astype('int')
    
    return test_df, train_df

In [11]:
results = {}

for split_rate in [0.1, 0.2]:
    for shuffle_every_epoch in [True, False]:
        print('split_rate: ', split_rate)
        print('shuffle_every_epoch: ', shuffle_every_epoch)
        print()
        
        # make run config
        run_config = {
            'shuffle_every_epoch': shuffle_every_epoch,
            'num_epochs': 30,
            'batch_size': 100,
            'init_scale': 0.05
        }

        test_df, train_df = logs_to_df(run_config=run_config, split_rate=split_rate)

        result = {
            'test': test_df,
            'train': train_df
        }

        key = 'split_' + str(int(split_rate * 100))
        if (shuffle_every_epoch):
            key += '_shuffle_every_epoch'
        else:
            key += '_shuffle_once'

        results[key] = result


split_rate:  0.1
shuffle_every_epoch:  True

split_rate:  0.1
shuffle_every_epoch:  False

split_rate:  0.2
shuffle_every_epoch:  True

split_rate:  0.2
shuffle_every_epoch:  False


In [12]:
split_10_shuffle_test = results['split_10_shuffle_every_epoch']['test']
split_10_shuffle_train = results['split_10_shuffle_every_epoch']['train']

split_10_no_shuffle_test = results['split_10_shuffle_once']['test']
split_10_no_shuffle_train = results['split_10_shuffle_once']['train']

split_20_shuffle_test = results['split_20_shuffle_every_epoch']['test']
split_20_shuffle_train = results['split_20_shuffle_every_epoch']['train']

split_20_no_shuffle_test = results['split_20_shuffle_once']['test']
split_20_no_shuffle_train = results['split_20_shuffle_once']['train']

In [13]:
default_indices = ['one_hot_default', 'not_one_hot_default']
sliding_window_incides = (['one_hot_sw_overlap', 
                           'one_hot_sw_partition', 
                           'one_hot_sw_default', 
                           'one_hot_sw_same_as_training', 
                           'not_one_hot_sw_overlap', 
                           'not_one_hot_sw_partition', 
                           'not_one_hot_sw_default', 
                           'not_one_hot_sw_same_as_training'])
important_cols = ['min_rmse', 'max_r2', 'max_auc', 'max_auc_config_code']

In [14]:
print(split_10_no_shuffle_test.loc[default_indices, important_cols])


                     min_rmse  max_r2  max_auc max_auc_config_code
one_hot_default         0.383   0.442    0.844                0111
not_one_hot_default     0.419   0.442    0.772                1111

In [15]:
print(split_10_shuffle_test.loc[default_indices, important_cols])


                     min_rmse  max_r2  max_auc max_auc_config_code
one_hot_default         0.383   0.443    0.844                0111
not_one_hot_default     0.419   0.445    0.769                1101

In [16]:
print(split_20_no_shuffle_test.loc[default_indices, important_cols])


                     min_rmse  max_r2  max_auc max_auc_config_code
one_hot_default         0.388   0.440    0.836                0001
not_one_hot_default     0.427   0.448    0.760                1101

In [17]:
print(split_20_shuffle_test.loc[default_indices, important_cols])


                     min_rmse  max_r2  max_auc max_auc_config_code
one_hot_default         0.387   0.439    0.837                0001
not_one_hot_default     0.426   0.440    0.763                1101

In [18]:
print(split_10_no_shuffle_test.loc[sliding_window_incides, important_cols])


                                 min_rmse  max_r2  max_auc max_auc_config_code
one_hot_sw_overlap                  0.398   0.458    0.794                0101
one_hot_sw_partition                0.396   0.461    0.797                0011
one_hot_sw_default                  0.393   0.478    0.801                0001
one_hot_sw_same_as_training         0.397   0.461    0.814                1001
not_one_hot_sw_overlap              0.439   0.464    0.708                1111
not_one_hot_sw_partition            0.437   0.464    0.710                1101
not_one_hot_sw_default              0.435   0.476    0.716                1111
not_one_hot_sw_same_as_training     0.436   0.462    0.739                1101

In [19]:
print(split_10_shuffle_test.loc[sliding_window_incides, important_cols])


                                 min_rmse  max_r2  max_auc max_auc_config_code
one_hot_sw_overlap                  0.396   0.459    0.793                0011
one_hot_sw_partition                0.398   0.459    0.797                0111
one_hot_sw_default                  0.395   0.477    0.796                1111
one_hot_sw_same_as_training         0.397   0.464    0.814                1001
not_one_hot_sw_overlap              0.438   0.462    0.704                1101
not_one_hot_sw_partition            0.439   0.463    0.704                1111
not_one_hot_sw_default              0.434   0.476    0.720                1101
not_one_hot_sw_same_as_training     0.436   0.461    0.740                1101

In [20]:
print(split_20_no_shuffle_test.loc[sliding_window_incides, important_cols])


                                 min_rmse  max_r2  max_auc max_auc_config_code
one_hot_sw_overlap                  0.402   0.475    0.796                0011
one_hot_sw_partition                0.400   0.467    0.799                0011
one_hot_sw_default                  0.401   0.491    0.799                0001
one_hot_sw_same_as_training         0.398   0.461    0.811                0011
not_one_hot_sw_overlap              0.448   0.476    0.693                1101
not_one_hot_sw_partition            0.448   0.475    0.694                1111
not_one_hot_sw_default              0.448   0.492    0.698                1101
not_one_hot_sw_same_as_training     0.436   0.461    0.732                1001

In [21]:
print(split_20_shuffle_test.loc[sliding_window_incides, important_cols])


                                 min_rmse  max_r2  max_auc max_auc_config_code
one_hot_sw_overlap                  0.400   0.474    0.798                0001
one_hot_sw_partition                0.399   0.475    0.798                0001
one_hot_sw_default                  0.401   0.489    0.799                0001
one_hot_sw_same_as_training         0.396   0.462    0.811                0011
not_one_hot_sw_overlap              0.448   0.475    0.695                1101
not_one_hot_sw_partition            0.447   0.476    0.693                1111
not_one_hot_sw_default              0.446   0.490    0.704                1101
not_one_hot_sw_same_as_training     0.436   0.460    0.730                1101