In [ ]:
# Setting options for the plots
%matplotlib inline
%config InlineBackend.figure_formats={'retina', 'svg'}
%config InlineBackend.rc={'savefig.dpi': 150}

Experiment Report


In [ ]:
import itertools
import math
import os
import re
import pickle
import platform
import time
import warnings

from functools import partial
from os.path import abspath, relpath, exists, join

import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import statsmodels.api as sm
from matplotlib import pyplot as plt
from textwrap import wrap

from IPython import sys_info
from IPython.display import display, HTML, Image, Javascript, Markdown, SVG
from rsmtool.reader import DataReader
from rsmtool.writer import DataWriter
from rsmtool.utils.files import parse_json_with_comments
from rsmtool.utils.notebook import (float_format_func,
                                    int_or_float_format_func,
                                    compute_subgroup_plot_params,
                                    bold_highlighter,
                                    color_highlighter,
                                    show_thumbnail)

from rsmtool.fairness_utils import (get_fairness_analyses,
                                    write_fairness_results)

from rsmtool.version import VERSION as rsmtool_version

sns.set_context('notebook')

In [ ]:
rsm_report_dir = os.environ.get('RSM_REPORT_DIR', None)
if rsm_report_dir is None:
    rsm_report_dir = os.getcwd()

rsm_environ_config = join(rsm_report_dir, '.environ.json')
if not exists(rsm_environ_config):
    raise FileNotFoundError('The file {} cannot be located. '
                            'Please make sure that either (1) '
                            'you have set the correct directory with the `RSM_REPORT_DIR` '
                            'environment variable, or (2) that your `.environ.json` '
                            'file is in the same directory as your notebook.'.format(rsm_environ_config))
    
environ_config = parse_json_with_comments(rsm_environ_config)

In [ ]:
# NOTE: you will need to set the following manually
# if you are using this notebook interactively.
experiment_id = environ_config.get('EXPERIMENT_ID')
description = environ_config.get('DESCRIPTION')
context = environ_config.get('CONTEXT')
train_file_location = environ_config.get('TRAIN_FILE_LOCATION')
test_file_location = environ_config.get('TEST_FILE_LOCATION')
output_dir = environ_config.get('OUTPUT_DIR')
figure_dir = environ_config.get('FIGURE_DIR')
model_name = environ_config.get('MODEL_NAME')
model_type = environ_config.get('MODEL_TYPE')
skll_fixed_parameters = environ_config.get('SKLL_FIXED_PARAMETERS')
skll_objective = environ_config.get('SKLL_OBJECTIVE')
file_format = environ_config.get('FILE_FORMAT')
length_column = environ_config.get('LENGTH_COLUMN')
second_human_score_column = environ_config.get('H2_COLUMN')
use_scaled_predictions = environ_config.get('SCALED')
min_score = environ_config.get("MIN_SCORE")
max_score = environ_config.get("MAX_SCORE")
standardize_features = environ_config.get('STANDARDIZE_FEATURES')
exclude_zero_scores = environ_config.get('EXCLUDE_ZEROS')
feature_subset_file = environ_config.get('FEATURE_SUBSET_FILE', ' ')
min_items = environ_config.get('MIN_ITEMS')
use_thumbnails = environ_config.get('USE_THUMBNAILS')
predict_expected_scores = environ_config.get('PREDICT_EXPECTED_SCORES')
rater_error_variance = environ_config.get("RATER_ERROR_VARIANCE")

# groups for analysis by prompt or subgroup.
groups_desc = environ_config.get('GROUPS_FOR_DESCRIPTIVES') 
groups_eval = environ_config.get('GROUPS_FOR_EVALUATIONS') 

# min number of n for group to be displayed in the report
min_n_per_group = environ_config.get('MIN_N_PER_GROUP')

if min_n_per_group is None:
    min_n_per_group = {}

# javascript path
javascript_path = environ_config.get("JAVASCRIPT_PATH")

In [ ]:
# initialize counter for thumbnail IDs
id_generator = itertools.count(1)

In [ ]:
with open(join(javascript_path, "sort.js"), "r", encoding="utf-8") as sortf:
    display(Javascript(data=sortf.read()))

In [ ]:
Markdown('''This report presents the analysis for **{}**: {}'''.format(experiment_id, description))

In [ ]:
markdown_str = ''
if use_thumbnails:
    markdown_str += ("""\n  - Images in this report have been converted to """
                     """clickable thumbnails.""")
if predict_expected_scores:
    markdown_str += ("""\n  - Predictions analyzed in this report are *expected scores*, """
                     """i.e., probability-weighted averages over all score points.""")

if markdown_str:
    markdown_str = '**Notes**:' + markdown_str
    display(Markdown(markdown_str))

In [ ]:
HTML(time.strftime('%c'))

In [ ]:
%%html
<div id="toc"></div>

In [ ]:
# Read in the training and testing features, both raw and pre-processed
# Make sure that the `spkitemid` and `candidate` columns are read as strings 
# to preserve any leading zeros
# We filter DtypeWarnings that pop up mostly in very large files

string_columns = ['spkitemid', 'candidate']
converter_dict = {column: str for column in string_columns}

with warnings.catch_warnings():
    warnings.filterwarnings('ignore', category=pd.io.common.DtypeWarning)
    if exists(train_file_location):
        df_train_orig = DataReader.read_from_file(train_file_location)

    train_file = join(output_dir, '{}_train_features.{}'.format(experiment_id,
                                                                file_format))
    if exists(train_file):
        df_train = DataReader.read_from_file(train_file, converters=converter_dict)

    train_metadata_file = join(output_dir, '{}_train_metadata.{}'.format(experiment_id,
                                                                         file_format))    
    if exists(train_metadata_file):
        df_train_metadata = DataReader.read_from_file(train_metadata_file, converters=converter_dict)

    train_other_columns_file = join(output_dir, '{}_train_other_columns.{}'.format(experiment_id,
                                                                                   file_format))
    if exists(train_other_columns_file):
        df_train_other_columns = DataReader.read_from_file(train_other_columns_file, converters=converter_dict)

    train_length_file = join(output_dir, '{}_train_response_lengths.{}'.format(experiment_id,
                                                                               file_format))
    if exists(train_length_file):
        df_train_length = DataReader.read_from_file(train_length_file, converters=converter_dict)

    train_excluded_file = join(output_dir, '{}_train_excluded_responses.{}'.format(experiment_id,
                                                                                   file_format))
    if exists(train_excluded_file):
        df_train_excluded = DataReader.read_from_file(train_excluded_file, converters=converter_dict)

    train_responses_with_excluded_flags_file = join(output_dir, '{}_train_responses_with_excluded_flags.{}'.format(experiment_id,
                                                                                                                   file_format))
    if exists(train_responses_with_excluded_flags_file):
        df_train_responses_with_excluded_flags = DataReader.read_from_file(train_responses_with_excluded_flags_file,
                                                                           converters=converter_dict)

    train_preproc_file = join(output_dir, '{}_train_preprocessed_features.{}'.format(experiment_id,
                                                                                     file_format))    
    if exists(train_preproc_file):
        df_train_preproc = DataReader.read_from_file(train_preproc_file, converters=converter_dict)

    if exists(test_file_location):
        df_test_orig = DataReader.read_from_file(test_file_location)

    test_file = join(output_dir, '{}_test_features.{}'.format(experiment_id,
                                                              file_format))
    if exists(test_file):
        df_test = DataReader.read_from_file(test_file, converters=converter_dict)

    test_metadata_file = join(output_dir, '{}_test_metadata.{}'.format(experiment_id,
                                                                       file_format))    
    if exists(test_metadata_file):
        df_test_metadata = DataReader.read_from_file(test_metadata_file, converters=converter_dict)

    test_other_columns_file = join(output_dir, '{}_test_other_columns.{}'.format(experiment_id,
                                                                                 file_format))
    if exists(test_other_columns_file):
        df_test_other_columns = DataReader.read_from_file(test_other_columns_file, converters=converter_dict)

    test_human_scores_file = join(output_dir, '{}_test_human_scores.{}'.format(experiment_id,
                                                                               file_format))
    if exists(test_human_scores_file):
        df_test_human_scores = DataReader.read_from_file(test_human_scores_file, converters=converter_dict)

    test_excluded_file = join(output_dir, '{}_test_excluded_responses.{}'.format(experiment_id,
                                                                                 file_format))
    if exists(test_excluded_file):
        df_test_excluded = DataReader.read_from_file(test_excluded_file, converters=converter_dict)

    test_responses_with_excluded_flags_file = join(output_dir, '{}_test_responses_with_excluded_flags.{}'.format(experiment_id,
                                                                                                                 file_format))
    if exists(test_responses_with_excluded_flags_file):
        df_test_responses_with_excluded_flags = DataReader.read_from_file(test_responses_with_excluded_flags_file,
                                                                          converters=converter_dict)

    test_preproc_file = join(output_dir, '{}_test_preprocessed_features.{}'.format(experiment_id,
                                                                                   file_format))
    if exists(test_preproc_file):
        df_test_preproc = DataReader.read_from_file(test_preproc_file, converters=converter_dict)

    pred_preproc_file = join(output_dir, '{}_pred_processed.{}'.format(experiment_id,
                                                                       file_format))
    if exists(pred_preproc_file):
        df_pred_preproc = DataReader.read_from_file(pred_preproc_file, converters=converter_dict)

    feature_file = join(output_dir, '{}_feature.{}'.format(experiment_id,
                                                           file_format))
    if exists(feature_file):
        df_features = DataReader.read_from_file(feature_file, converters=converter_dict)
        features_used = [c for c in df_features.feature.values]
        # compute the longest feature name: we'll need if for the plots
        longest_feature_name = max(map(len, features_used))

    betas_file = join(output_dir, '{}_betas.{}'.format(experiment_id,
                                                       file_format))
    if exists(betas_file):
        df_betas = DataReader.read_from_file(betas_file)

    if exists(feature_subset_file):
        df_feature_subset_specs = DataReader.read_from_file(feature_subset_file)
    else:
        df_feature_subset_specs = None

In [ ]:
# check for continuous human scores in the evaluation set
continuous_human_score = False

if exists(pred_preproc_file):
    if not df_pred_preproc['sc1'].equals(np.round(df_pred_preproc['sc1'])):
        continuous_human_score = True