In [ ]:
def summarize_feature_correlations(model_list, file_suffix, header, file_format_summarize):
corrs = []
for (model_id, model_name, config, csvdir, file_format) in model_list:
corr_file = os.path.join(csvdir, '{}_{}.{}'.format(model_id, file_suffix, file_format))
if os.path.exists(corr_file):
model_corrs = DataReader.read_from_file(corr_file, index_col=0)
model_corrs.index = [model_name]
corrs.append(model_corrs)
if not len(corrs) == 0:
df_summ = pd.concat(corrs, sort=True)
display(header)
display(HTML(df_summ.to_html(index=True, classes = ['sortable'],
escape=False,
float_format=int_or_float_format_func)))
writer = DataWriter(summary_id)
writer.write_experiment_output(output_dir,
{file_suffix: df_summ},
index=True,
file_format=file_format_summarize)
In [ ]:
header = Markdown("####Marginal corelations against score\n\n\n "
"The table shows marginal correlations between each feature "
"and the human score.")
summarize_feature_correlations(model_list, 'margcor_score_all_data', header, file_format_summarize)
In [ ]:
header = Markdown("####Partial correlations after controlling for all other variables\n\n\n "
"This table shows Pearson's correlation between each feature and human score after "
"controlling for all other features")
summarize_feature_correlations(model_list, 'pcor_score_all_data', header, file_format_summarize)
In [ ]:
header = Markdown("####Partial correlations after controlling for length\n\n\n "
"This table shows Pearson's correlation between each feature and human score after "
"controlling for length")
summarize_feature_correlations(model_list, 'pcor_score_no_length_all_data', header, file_format_summarize)