In [ ]:
markdown_str = ("The tables in this section show the standard association metrics between "
"*observed* human scores and different types of machine scores. "
"These results are computed on the evaluation set. `raw_trim` scores "
"are truncated to [{}, {}]. `raw_trim_round` scores are computed by first truncating "
"and then rounding the predicted score. Scaled scores are computed by re-scaling "
"the predicted scores using mean and standard deviation of human scores as observed "
"on the training data and mean and standard deviation of machine scores as predicted "
"for the training set.".format(min_score, max_score))
display(Markdown(markdown_str))
In [ ]:
raw_or_scaled = "scaled" if use_scaled_predictions else "raw"
eval_file = join(output_dir, '{}_eval.{}'.format(experiment_id, file_format))
df_eval = DataReader.read_from_file(eval_file, index_col=0)
distribution_columns = ['N', 'h_mean', 'sys_mean', 'h_sd', 'sys_sd', 'h_min', 'sys_min', 'h_max', 'sys_max', 'SMD']
association_columns = ['N'] + [column for column in df_eval.columns if not column in distribution_columns]
df_distribution = df_eval[distribution_columns]
df_association = df_eval[association_columns]
In [ ]:
pd.options.display.width=10
formatter = partial(color_highlighter, low=-0.15, high=0.15)
HTML('<span style="font-size:95%">'+ df_distribution.to_html(classes=['sortable'],
escape=False,
formatters={'SMD': formatter},
float_format=float_format_func) + '</span>')
In [ ]:
markdown_str = ['The table shows the standard association metrics between human scores and machine scores.']
if continuous_human_score:
markdown_str.append("Note that for computation of `kappa` both human and machine scores are rounded.")
else:
markdown_str.append("Note that for computation of `kappa` all machine scores are rounded.")
Markdown('\n'.join(markdown_str))
In [ ]:
pd.options.display.width=10
HTML('<span style="font-size:95%">'+ df_association.to_html(classes=['sortable'],
escape=False,
float_format=float_format_func) + '</span>')
In [ ]:
markdown_str = ["Confusion matrix using {}, trimmed, and rounded scores and human scores (rows=system, columns=human).".format(raw_or_scaled)]
if continuous_human_score:
markdown_str.append("Note: Human scores have beeen rounded to the nearest integer.")
Markdown('\n'.join(markdown_str))
In [ ]:
confmat_file = join(output_dir, '{}_confMatrix.{}'.format(experiment_id, file_format))
df_confmat = DataReader.read_from_file(confmat_file, index_col=0)
df_confmat
In [ ]:
markdown_strs = ["The histogram and the table below show the distibution of "
"human scores and {}, trimmed, and rounded machine scores "
"(as % of all responses).".format(raw_or_scaled)]
markdown_strs.append("Differences in the table between human and machine distributions "
"larger than 5 percentage points are <span class='highlight_color'>highlighted</span>.")
if continuous_human_score:
markdown_strs.append("Note: Human scores have beeen rounded to the nearest integer.")
display(Markdown('\n'.join(markdown_strs)))
In [ ]:
scoredist_file = join(output_dir, '{}_score_dist.{}'.format(experiment_id, file_format))
df_scoredist = DataReader.read_from_file(scoredist_file, index_col=0)
df_scoredist_melted = pd.melt(df_scoredist, id_vars=['score'])
df_scoredist_melted = df_scoredist_melted[df_scoredist_melted['variable'] != 'difference']
# get the colors for the plot
colors = sns.color_palette("Greys", 2)
with sns.axes_style('whitegrid'):
# make a barplot without a legend since we will
# add one manually later
p = sns.catplot("score", "value", "variable", kind="bar",
palette=colors, data=df_scoredist_melted,
height=3, aspect=2, legend=False)
p.set_axis_labels('score', '% of responses')
# add a legend with the right colors
axis = p.axes[0][0]
legend = axis.legend(labels=('Human', 'Machine'), title='', frameon=True, fancybox=True)
legend.legendHandles[0].set_color(colors[0])
legend.legendHandles[1].set_color(colors[1])
imgfile = join(figure_dir, '{}_score_dist.svg'.format(experiment_id))
plt.savefig(imgfile)
if use_thumbnails:
show_thumbnail(imgfile, next(id_generator))
else:
plt.show()
In [ ]:
formatter = partial(color_highlighter, low=0, high=5, absolute=True)
df_html = df_scoredist.to_html(classes=['sortable'], index=False,
escape=False, formatters={'difference': formatter})
display(HTML(df_html))