Feature descriptives


In [ ]:
def summarize_feature_correlations(model_list, file_suffix, header, file_format_summarize):
    corrs = []
    for (model_id, model_name, config, csvdir, file_format) in model_list:
        corr_file = os.path.join(csvdir, '{}_{}.{}'.format(model_id, file_suffix, file_format))
        if os.path.exists(corr_file):
            model_corrs = DataReader.read_from_file(corr_file, index_col=0)
            model_corrs.index = [model_name]
            corrs.append(model_corrs)
    if not len(corrs) == 0:
        df_summ = pd.concat(corrs, sort=True)
        display(header)
        display(HTML(df_summ.to_html(index=True, classes = ['sortable'],
                                     escape=False,
                                     float_format=int_or_float_format_func)))

        writer = DataWriter(summary_id)
        writer.write_experiment_output(output_dir,
                                       {file_suffix: df_summ},
                                       index=True,
                                       file_format=file_format_summarize)

Marginal and partial correlations

The tables below shows correlations between truncated and standardized (if applicable) values of each feature against human score for each model. All correlations are computed on the training sets.


In [ ]:
header = Markdown("####Marginal corelations against score\n\n\n "
                  "The table shows marginal correlations between each feature "
                  "and the human score.")

summarize_feature_correlations(model_list, 'margcor_score_all_data', header, file_format_summarize)

In [ ]:
header = Markdown("####Partial correlations after controlling for all other variables\n\n\n "
                  "This table shows Pearson's correlation between each feature and human score after "
                  "controlling for all other features")

summarize_feature_correlations(model_list, 'pcor_score_all_data', header, file_format_summarize)

In [ ]:
header = Markdown("####Partial correlations after controlling for length\n\n\n "
                  "This table shows Pearson's correlation between each feature and human score after "
                  "controlling for length")

summarize_feature_correlations(model_list, 'pcor_score_no_length_all_data', header, file_format_summarize)