In [ ]:
# Setting options for the plots
%matplotlib inline
%config InlineBackend.figure_formats={'retina', 'svg'}
%config InlineBackend.rc={'savefig.dpi': 150}
In [ ]:
import argparse
import base64
import itertools
import json
import logging
import numpy as np
import os
import platform
import pandas as pd
import re
import sys
import time
from functools import partial
from os.path import abspath, exists, join
from IPython import sys_info
from IPython.display import display, HTML, Image, Javascript, Markdown, SVG
from rsmtool.comparer import Comparer
from rsmtool.reader import DataReader
from rsmtool.writer import DataWriter
from rsmtool.utils.files import parse_json_with_comments
from rsmtool.utils.notebook import (float_format_func,
int_or_float_format_func,
bold_highlighter,
color_highlighter,
show_thumbnail)
from rsmtool.version import VERSION as rsmtool_version
In [ ]:
rsm_report_dir = os.environ.get('RSM_REPORT_DIR', None)
if rsm_report_dir is None:
rsm_report_dir = os.getcwd()
rsm_environ_config = join(rsm_report_dir, '.environ.json')
if not exists(rsm_environ_config):
raise FileNotFoundError('The file {} cannot be located. '
'Please make sure that either (1) '
'you have set the correct directory with the `RSM_REPORT_DIR` '
'environment variable, or (2) that your `.environ.json` '
'file is in the same directory as your notebook.'.format(rsm_environ_config))
environ_config = parse_json_with_comments(rsm_environ_config)
In [ ]:
# NOTE: you will need to set the following manually
# if you are using this notebook interactively.
experiment_id_old = environ_config.get('EXPERIMENT_ID_OLD')
description_old = environ_config.get('DESCRIPTION_OLD')
output_dir_old = environ_config.get('OUTPUT_DIR_OLD')
figure_dir_old = environ_config.get('FIGURE_DIR_OLD')
scaled_old = environ_config.get('SCALED_OLD')
score_prefix_old = 'scale' if scaled_old else 'raw'
use_thumbnails = environ_config.get('USE_THUMBNAILS')
experiment_id_new = environ_config.get('EXPERIMENT_ID_NEW')
description_new = environ_config.get('DESCRIPTION_NEW')
output_dir_new = environ_config.get('OUTPUT_DIR_NEW')
figure_dir_new = environ_config.get('FIGURE_DIR_NEW')
scaled_new = environ_config.get('SCALED_NEW')
score_prefix_new = 'scale' if scaled_new else 'raw'
# javascript path
javascript_path = environ_config.get("JAVASCRIPT_PATH")
# groups for subgroup analysis.
groups_desc = environ_config.get('GROUPS_FOR_DESCRIPTIVES')
groups_eval = environ_config.get('GROUPS_FOR_EVALUATIONS')
if len(groups_desc) == 1 and groups_desc[0] == '':
groups_desc = []
if len(groups_eval) == 1 and groups_eval[0] == '':
groups_eval = []
In [ ]:
# initialize id generator for thumbnails
id_generator = itertools.count(1)
In [ ]:
with open(join(javascript_path, "sort.js"), "r", encoding="utf-8") as sortf:
display(Javascript(data=sortf.read()))
In [ ]:
markdown_strs = ['This report presents a comparison of the following two experiments']
markdown_strs.append('')
markdown_strs.append(' Old Experiment ID: **{}**'.format(experiment_id_old))
markdown_strs.append('')
markdown_strs.append(' Description: {}'.format(description_old))
markdown_strs.append('')
markdown_strs.append('')
markdown_strs.append(' New Experiment ID: **{}**'.format(experiment_id_new))
markdown_strs.append('')
markdown_strs.append(' Description: {}'.format(description_new))
markdown_strs.append('')
Markdown('\n'.join(markdown_strs))
In [ ]:
if use_thumbnails:
display(Markdown("""***Note: Images in this report have been converted to """
"""clickable thumbnails***"""))
In [ ]:
HTML(time.strftime('%c'))
In [ ]:
%%html
<div id="toc"></div>
In [ ]:
# load the two sets of RSMTool outputs
comparer = Comparer()
(outputs_old, figures_old,
file_format_old) = comparer.load_rsmtool_output(output_dir_old,
figure_dir_old,
experiment_id_old,
prefix=score_prefix_old,
groups_eval=groups_eval)
(outputs_new, figures_new,
file_format_new) = comparer.load_rsmtool_output(output_dir_new,
figure_dir_new,
experiment_id_new,
prefix=score_prefix_new,
groups_eval=groups_eval)
In [ ]:
def combine_old_new_results(df_old, df_new, name):
# check that both data frames are not empty and return an empty data frame if this is the case
if df_old.empty and df_new.empty:
return pd.DataFrame()
# if only one data frame is empty, keep the one that exists and substitute '-' for missing data
elif df_old.empty:
df_old = pd.DataFrame(columns=df_new.columns, index=df_new.index, data='-')
df_diff = pd.DataFrame(columns=df_new.columns, index=df_new.index, data='-')
elif df_new.empty:
df_new = pd.DataFrame(columns=df_old.columns, index=df_old.index, data='-')
df_diff = pd.DataFrame(columns=df_old.columns, index=df_old.index, data='-')
else:
# combine the two dataframes and compute the difference
df_diff = df_new - df_old
# if the dataframe pertains to features or scores, then add a fake column
# to the old dataframe if the feature was added and, conversely,
# to the new dataframe if the feature was removed.
if name in ['descriptives',
'outliers',
'percentiles',
'coefs',
'feature_cors',
'eval_by_prompt_overview',
'pcor_sc1_overview',
'mcor_sc1_overview',
'pcor_log_dta_dtu_overview',
'score_dist']:
added_features_or_scores = list(set(df_new.index).difference(df_old.index))
removed_features_or_scores = list(set(df_old.index).difference(df_new.index))
for afs in added_features_or_scores:
df_old.loc[afs] = '-'
for rfs in removed_features_or_scores:
df_new.loc[rfs] = '-'
df_old['version'] = 'old'
df_new['version'] = 'new'
df_diff['version'] = 'change'
tmp_df = pd.DataFrame(df_old, copy=True)
tmp_df = tmp_df.append(df_new, sort=True)
tmp_df = tmp_df.append(df_diff, sort=True)
tmp_df.index.name = 'for'
tmp_df = tmp_df.reset_index().sort_values(by=['for', 'version'], ascending=[True, False]).set_index(tmp_df.index.names)
tmp_df.index.name = None
# put version first
tmp_df = tmp_df[['version'] + [x for x in tmp_df.columns if x != 'version']]
return tmp_df
In [ ]:
out_dfs = {}
name_old_new = [('descriptives', outputs_old['df_descriptives'], outputs_new['df_descriptives']),
('outliers', outputs_old['df_outliers'], outputs_new['df_outliers']),
('feature_cors', outputs_old['df_feature_cors'], outputs_new['df_feature_cors']),
('percentiles', outputs_old['df_percentiles'], outputs_new['df_percentiles']),
('eval_overview', outputs_old['df_eval'], outputs_new['df_eval']),
('mcor_sc1', outputs_old['df_mcor_sc1'], outputs_new['df_mcor_sc1']),
('mcor_sc1_overview', outputs_old['df_mcor_sc1_overview'], outputs_new['df_mcor_sc1_overview']),
('pcor_sc1', outputs_old['df_pcor_sc1'], outputs_new['df_pcor_sc1']),
('pcor_sc1_overview', outputs_old['df_pcor_sc1_overview'], outputs_new['df_pcor_sc1_overview']),
('score_dist', outputs_old['df_score_dist'], outputs_new['df_score_dist']),
('consistency', outputs_old['df_consistency'], outputs_new['df_consistency']),
('disattenuated_correlations', outputs_old['df_disattenuated_correlations'], outputs_new['df_disattenuated_correlations']),
('true_score_evaluations', outputs_old['df_true_score_eval'], outputs_new['df_true_score_eval'])]
# add the subgroup differences
for group in groups_eval:
name_old_new.extend([('eval_by_{}'.format(group), outputs_old['df_eval_by_{}'.format(group)], outputs_new['df_eval_by_{}'.format(group)]),
('eval_by_{}_m_sd'.format(group), outputs_old['df_eval_by_{}_m_sd'.format(group)], outputs_new['df_eval_by_{}_m_sd'.format(group)]),
('eval_by_{}_overview'.format(group), outputs_old['df_eval_by_{}_overview'.format(group)], outputs_new['df_eval_by_{}_overview'.format(group)]),
('mcor_sc1_by_{}'.format(group), outputs_old['df_mcor_sc1_by_{}'.format(group)], outputs_new['df_mcor_sc1_by_{}'.format(group)]),
('pcor_sc1_by_{}'.format(group), outputs_old['df_pcor_sc1_by_{}'.format(group)], outputs_new['df_pcor_sc1_by_{}'.format(group)]),
('disattenuated_correlations_by_{}'.format(group),
outputs_old['df_disattenuated_correlations_by_{}'.format(group)],
outputs_new['df_disattenuated_correlations_by_{}'.format(group)]),
('disattenuated_correlations_by_{}_overview'.format(group),
outputs_old['df_disattenuated_correlations_by_{}_overview'.format(group)],
outputs_new['df_disattenuated_correlations_by_{}_overview'.format(group)])])
# combine the old and new data and compute the difference
for name, df_old, df_new in name_old_new:
out_dfs[name] = combine_old_new_results(df_old, df_new, name)
# define the message for missing information
no_info_str = 'This information is not available for either of the models.'
no_plot_old_str = 'This figure is not available for the old model.'
no_plot_new_str = 'This figure is not available for the new model.'
# WARN IF THE OLD AND NEW DATASETS ARE NOT THE SAME SIZE
log_msgs = []
# first check the training set
if not (outputs_old['df_descriptives'].empty or outputs_new['df_descriptives'].empty):
oldTrainN = outputs_old['df_descriptives']['N'][0] # take the N from the descriptive stats for the first feature
newTrainN = outputs_new['df_descriptives']['N'][0]
if oldTrainN != newTrainN:
log_msg = "WARNING: the training sets were different sizes. old N: {}, new N: {}.".format(oldTrainN, newTrainN)
log_msgs.append(log_msg)
# the check the test set
if not (outputs_old['df_eval'].empty or outputs_new['df_eval'].empty):
oldTestN = outputs_old['df_eval']['N']
newTestN = outputs_new['df_eval']['N']
if not np.all(oldTestN == newTestN):
log_msg = "WARNING: the testing sets were different sizes. old N: {}, new N: {}.".format(oldTestN, newTestN)
log_msgs.append(log_msg)