In [ ]:
if not out_dfs['true_score_evaluations'].empty:
markdown_strs = []
markdown_strs.append("The tables in this section show how well system scores can "
"predict *true* scores. According to Test theory, a *true* score "
"is a score that would have been obtained if there were no errors "
"in measurement. While true scores cannot be observed, the variance "
"of true scores and the prediction error can be estimated using observed "
"human scores when multiple human ratings are available for a subset of "
"responses. In this notebook these are estimated using human scores for "
"responses in the evaluation set.")
markdown_strs.append("The table shows variance of human rater errors, "
"true score variance, mean squared error (MSE) and "
"proportional reduction in mean squared error (PRMSE) for "
"predicting a true score with system score.")
display(Markdown('\n'.join(markdown_strs)))
pd.options.display.width=10
prmse_columns = ['version', 'N','N raters', 'N single', 'N multiple',
'Variance of errors', 'True score var',
'MSE true', 'PRMSE true']
df_prmse = out_dfs['true_score_evaluations'][prmse_columns].copy()
df_prmse.replace({np.nan: '-'}, inplace=True)
display(HTML('<span style="font-size:95%">'+ df_prmse.to_html(classes=['sortable'],
escape=False, index=False,
float_format=float_format_func) + '</span>'))
else:
display(Markdown(no_info_str))