In [1]:
%load_ext autoreload
In [2]:
%autoreload 2
In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import getpass
from IPython.display import display
import json
import nltk
import numpy as np
import pandas as pd
import pkg_resources
import os
import random
import re
import seaborn as sns
import sklearn.metrics as metrics
import tensorflow as tf
from tensorflow.python.lib.io import file_io
In [4]:
from utils_export.dataset import Dataset, Model
from utils_export import utils_cloudml
from utils_export import utils_tfrecords
In [5]:
# Faster to access GCS file:
# https://github.com/tensorflow/tensorflow/issues/15530
os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0'
In [6]:
nltk.download('punkt')
Out[6]:
In [7]:
def tokenizer(text, lowercase=True):
"""Converts text to a list of words.
Args:
text: piece of text to tokenize (string).
lowercase: whether to include lowercasing in preprocessing (bool).
Returns:
A list of strings (words).
"""
words = nltk.word_tokenize(text.decode('utf-8'))
if lowercase:
words = [w.lower() for w in words]
return words
In [8]:
def make_test_input_fn(dataset_path,
model_text_feature,
dataset_text_feature,
data_label,
tokenizer_fn,
label_data_type=tf.float32,
max_n_examples=None,
random_filter_keep_rate=1.0):
"""Returns a test input function.
Args:
dataset_path (str): Path to dataset.
model_text_feature (str): The feature column corresponding to the
text input the model expects.
dataset_text_feature (str): The name of the text feature of the dataset.
data_label (str): The output label for the dataset.
tokenizer_fn: Tokenizer function (str -> list).
max_n_examples (int): How many examples to evaluate on.
random_filter_keep_rate (float): Filter out test examples with this probability.
Returns:
Test input function.
"""
decoding_input_features = {
dataset_text_feature: tf.FixedLenFeature([], dtype=tf.string),
data_label: tf.FixedLenFeature([], dtype=label_data_type)
}
def test_input_fn(max_n_examples=max_n_examples,
random_filter_keep_rate=random_filter_keep_rate):
"""Test input function.
Args:
max_n_examples (int): How many examples to evaluate on.
random_filter_keep_rate (float): Filter out test examples with this probability.
Returns:
DataFrame with the results.
"""
res = utils_tfrecords.decode_tf_records_to_pandas(
decoding_input_features,
dataset_path,
max_n_examples,
random_filter_keep_rate)
if not tokenizer_fn:
tok = lambda x: [x]
res[model_text_feature] = list(map(tok, res[dataset_text_feature]))
else:
res[model_text_feature] = list(map(tokenizer_fn, res[dataset_text_feature]))
res = res.rename(columns={ data_label: 'label' })
res['label'] = list(map(lambda x: bool(round(x)), list(res['label'])))
final = res.copy(deep=True)
return final
return test_input_fn
In [9]:
def print_results(results_df, model_names, print_pr_curve=False):
"""Print the classification results.
Args:
results_df: DataFrame with the results.
model_names: List of strings representing the models for which we have results.
"""
labels = results_df['label']
for _model in model_names:
print(_model)
model_preds = results_df[_model]
fpr, tpr, thresholds = metrics.roc_curve(labels, model_preds)
roc_auc = metrics.auc(fpr, tpr)
recalls, precisions, thr = metrics.precision_recall_curve(labels, model_preds)
pr_auc = metrics.auc(precisions, recalls)
model_preds_binary = (model_preds > 0.5).astype(np.int_)
f1 = metrics.f1_score(labels, model_preds_binary)
print('\tROC AUC: {}'.format(roc_auc))
print('\tPR AUC: {}'.format(pr_auc))
print('\tF1: {}'.format(f1))
In [10]:
PROJECT_NAME = 'conversationai-models'
SENTENCE_KEY = 'comment_key' #Input key
# Pattern for path of tf_records
OUTPUT_DIR_BASE = os.path.join(
'gs://conversationai-models',
getpass.getuser(),
'tfrecords')
In [175]:
LABEL_NAME_PREDICTION_MODEL = 'toxicity/logistic'
DATASET = 'gs://conversationai-models/resources/civil_comments_data/train_eval_test/test-*.tfrecord'
DATA_LABEL = 'toxicity'
DATASET_TEXT_FEATURE='comment_text'
# Pattern for path of tf_records
OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'civil_comments_test')
In [140]:
MODEL_TEXT_FEATURE = 'tokens'
MODEL_NAMES = [
'tf_cnn_civil_comments_glove:v_20190219_185541',
'tf_gru_attention_civil_comments_glove:v_20190219_185619',
]
model_input_spec = {
MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}
model = Model(
feature_keys_spec=model_input_spec,
prediction_keys=LABEL_NAME_PREDICTION_MODEL,
example_key=SENTENCE_KEY,
model_names=MODEL_NAMES,
project_name=PROJECT_NAME)
test_input_fn = make_test_input_fn(
DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, tokenizer)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)
In [ ]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
In [143]:
civil_comments_test_df = test_dataset.show_data()
In [144]:
print_results(civil_comments_test_df, MODEL_NAMES)
In [189]:
MODEL_TEXT_FEATURE = 'text'
MODEL_NAMES = [
'tf_hub_classifier_civil_comments:v20190322_142141_21201_1553344552',
]
model_input_spec = {
MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}
model = Model(
feature_keys_spec=model_input_spec,
prediction_keys=LABEL_NAME_PREDICTION_MODEL,
example_key=SENTENCE_KEY,
model_names=MODEL_NAMES,
project_name=PROJECT_NAME)
test_input_fn = make_test_input_fn(
DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, None)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)
In [ ]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
In [193]:
civil_comments_hub_df = test_dataset.show_data()
In [194]:
print_results(civil_comments_hub_df, MODEL_NAMES)
In [195]:
LABEL_NAME_PREDICTION_MODEL = 'frac_neg/logistic'
DATASET = 'gs://conversationai-models/resources/toxicity_data/toxicity_q42017_test.tfrecord'
DATA_LABEL = 'frac_neg'
DATASET_TEXT_FEATURE='comment_text'
# Pattern for path of tf_records
OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'toxicity_test')
In [162]:
MODEL_TEXT_FEATURE = 'tokens'
MODEL_NAMES = [
'tf_cnn_toxicity_glove:v_20190219_185532',
'tf_gru_attention_toxicity_glove:v_20190219_185516',
]
model_input_spec = {
MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}
model = Model(
feature_keys_spec=model_input_spec,
prediction_keys=LABEL_NAME_PREDICTION_MODEL,
example_key=SENTENCE_KEY,
model_names=MODEL_NAMES,
project_name=PROJECT_NAME)
test_input_fn = make_test_input_fn(
DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, tokenizer)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)
In [ ]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
In [149]:
toxicity_test_df1 = test_dataset.show_data()
In [150]:
print_results(toxicity_test_df1, MODEL_NAMES)
In [196]:
MODEL_TEXT_FEATURE = 'text'
MODEL_NAMES = [
'tf_hub_classifier_toxicity:v20190322_142740_24239_1553555427',
]
model_input_spec = {
MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}
model = Model(
feature_keys_spec=model_input_spec,
prediction_keys=LABEL_NAME_PREDICTION_MODEL,
example_key=SENTENCE_KEY,
model_names=MODEL_NAMES,
project_name=PROJECT_NAME)
test_input_fn = make_test_input_fn(
DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, None)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)
In [ ]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
In [200]:
toxicity_test_df2 = test_dataset.show_data()
In [201]:
print_results(toxicity_test_df2, MODEL_NAMES)
In [23]:
LABEL_NAME_PREDICTION_MODEL = 'removed/logistic'
DATASET = 'gs://conversationai-models/resources/transfer_learning_data/many_communities/20181105_answers_all_columns_nthain.tfrecord'
DATA_LABEL = 'removed'
DATASET_TEXT_FEATURE='comment_text'
# Pattern for path of tf_records
OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'many_communities_test')
In [14]:
MODEL_TEXT_FEATURE = 'tokens'
MODEL_NAMES = [
'tf_cnn_many_communities_glove:v_20190219_185551_gpu_p100_4',
#'tf_gru_attention_many_communities:v20190322_142800_507893_1556085643',
#'tf_gru_attention_many_communities:v20190315_161037_23271_1555129264',
'tf_gru_attention_many_communities:v20190705_004839_507000_1562364428_gpu_p100_4',
]
model_input_spec = {
MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}
model = Model(
feature_keys_spec=model_input_spec,
prediction_keys=LABEL_NAME_PREDICTION_MODEL,
example_key=SENTENCE_KEY,
model_names=MODEL_NAMES,
project_name=PROJECT_NAME)
test_input_fn = make_test_input_fn(
DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, tokenizer, label_data_type=tf.int64)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(100000000)
In [ ]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
In [318]:
many_communities_test_df = test_dataset.show_data()
In [319]:
print_results(many_communities_test_df, MODEL_NAMES)
In [24]:
MODEL_TEXT_FEATURE = 'text'
MODEL_NAMES = [
'tf_hub_classifier_many_communities:v20190219_185602_316000_1553563221_gpu_v100_4',
]
model_input_spec = {
MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}
model = Model(
feature_keys_spec=model_input_spec,
prediction_keys=LABEL_NAME_PREDICTION_MODEL,
example_key=SENTENCE_KEY,
model_names=MODEL_NAMES,
project_name=PROJECT_NAME)
test_input_fn = make_test_input_fn(
DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, None, label_data_type=tf.int64)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)
In [ ]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
In [16]:
many_communities_tfhub_test_df = test_dataset.show_data()
In [ ]:
print_results(many_communities_tfhub_test_df, MODEL_NAMES)
In [22]:
LABEL_NAME_PREDICTION_MODEL = 'label/logistic'
DATASET_VALID = 'gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/validation_query..tfrecord'
DATASET_TEST = 'gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/test_query..tfrecord'
DATA_LABEL = 'label'
DATASET_TEXT_FEATURE='text'
In [20]:
# Pattern for path of tf_records
OUTPUT_DIR_VALID = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/pessimistic/valid')
OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/pessimistic/test')
In [23]:
MODEL_TEXT_FEATURE = 'tokens'
MODEL_NAMES = [
'tf_cnn_many_communities_40_per_8_shot_pessimistic:v20190723_110543_2800_1563906804_gpu_k80_1',
'tf_gru_attention_many_communities_40_per_8_shot_pessimistic:v20190723_110533_4400_1563906956_gpu_k80_1',
]
model_input_spec = {
MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}
model = Model(
feature_keys_spec=model_input_spec,
prediction_keys=LABEL_NAME_PREDICTION_MODEL,
example_key=SENTENCE_KEY,
model_names=MODEL_NAMES,
project_name=PROJECT_NAME)
valid_input_fn = make_test_input_fn(
DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, tokenizer, label_data_type=tf.int64)
test_input_fn = make_test_input_fn(
DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, tokenizer, label_data_type=tf.int64)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)
valid_dataset.load_data(100000000)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)
test_dataset.load_data(100000000)
In [ ]:
# Set recompute_predictions=False to save time if predictions are available.
valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
In [27]:
print_results(valid_dataset.show_data(), MODEL_NAMES)
In [25]:
print_results(test_dataset.show_data(), MODEL_NAMES)
In [26]:
MODEL_TEXT_FEATURE = 'text'
MODEL_NAMES = [
'tf_hub_classifier_many_communities_40_per_8_shot_pessimistic:v20190723_110557_2600_1563911706_gpu_k80_1',
]
model_input_spec = {
MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}
model = Model(
feature_keys_spec=model_input_spec,
prediction_keys=LABEL_NAME_PREDICTION_MODEL,
example_key=SENTENCE_KEY,
model_names=MODEL_NAMES,
project_name=PROJECT_NAME)
valid_input_fn = make_test_input_fn(
DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, None, label_data_type=tf.int64)
test_input_fn = make_test_input_fn(
DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, None, label_data_type=tf.int64)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)
valid_dataset.load_data(100000000)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)
test_dataset.load_data(100000000)
In [ ]:
# Set recompute_predictions=False to save time if predictions are available.
valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
In [30]:
print_results(valid_dataset.show_data(), MODEL_NAMES)
In [31]:
print_results(test_dataset.show_data(), MODEL_NAMES)
In [14]:
# Pattern for path of tf_records
OUTPUT_DIR_VALID = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/optimistic/valid')
OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR_BASE, 'many_communities_40_per_8_shot/optimistic/test')
In [15]:
MODEL_TEXT_FEATURE = 'tokens'
MODEL_NAMES = [
'tf_cnn_many_communities_40_per_8_shot_optimistic:v20190723_110516_4200_1563906960_gpu_k80_1',
'tf_gru_attention_many_communities_40_per_8_shot_optimistic:v20190723_110524_4200_1563907005_gpu_k80_1',
]
model_input_spec = {
MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}
model = Model(
feature_keys_spec=model_input_spec,
prediction_keys=LABEL_NAME_PREDICTION_MODEL,
example_key=SENTENCE_KEY,
model_names=MODEL_NAMES,
project_name=PROJECT_NAME)
valid_input_fn = make_test_input_fn(
DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, tokenizer, label_data_type=tf.int64)
test_input_fn = make_test_input_fn(
DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, tokenizer, label_data_type=tf.int64)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)
valid_dataset.load_data(100000000)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)
test_dataset.load_data(100000000)
In [ ]:
# Set recompute_predictions=False to save time if predictions are available.
valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
In [37]:
print_results(valid_dataset.show_data(), MODEL_NAMES)
In [38]:
print_results(test_dataset.show_data(), MODEL_NAMES)
In [18]:
MODEL_TEXT_FEATURE = 'text'
MODEL_NAMES = [
'tf_hub_classifier_many_communities_40_per_8_shot_optimistic:v20190723_102555_3600_1563909345_gpu_k80_1',
]
model_input_spec = {
MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}
model = Model(
feature_keys_spec=model_input_spec,
prediction_keys=LABEL_NAME_PREDICTION_MODEL,
example_key=SENTENCE_KEY,
model_names=MODEL_NAMES,
project_name=PROJECT_NAME)
valid_input_fn = make_test_input_fn(
DATASET_VALID, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, None, label_data_type=tf.int64)
test_input_fn = make_test_input_fn(
DATASET_TEST, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
DATA_LABEL, None, label_data_type=tf.int64)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
valid_dataset = Dataset(valid_input_fn, OUTPUT_DIR_VALID)
valid_dataset.load_data(100000000)
In [ ]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)
test_dataset = Dataset(test_input_fn, OUTPUT_DIR_TEST)
test_dataset.load_data(100000000)
In [ ]:
# Set recompute_predictions=False to save time if predictions are available.
valid_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)
In [22]:
print_results(valid_dataset.show_data(), MODEL_NAMES)
In [23]:
print_results(test_dataset.show_data(), MODEL_NAMES)
In [11]:
import csv
import matplotlib.pyplot as plt
from sklearn.utils import fixes
In [12]:
def get_list_results_files(parent_dir):
"""Gets the paths of all results files that are in parent_dir."""
file_list = []
for subdirectory, _, files in tf.gfile.Walk(parent_dir):
[file_list.append(os.path.join(parent_dir, fname)) for fname in files]
return file_list
In [22]:
def load_csv_predictions(pred_file, is_test=False):
"""Load the CSV file with predictions and labels."""
model_predictions = None
labels = None
communities = None
names = ['label', 'pred', 'community']
if is_test:
names = ['community', 'label', 'pred']
with file_io.FileIO(pred_file, 'r') as f:
df = pd.read_csv(f, header=None, names=names)
labels = df['label'].values
model_predictions = df['pred'].values
communities = df['community'].values
return labels, model_predictions, communities
In [14]:
def plot_pr_curve(precisions, recalls, identifier=None):
"""Plots the Precision/Recall curve.
Args:
precisions: Precisions at all score thresholds.
recalls: Recalls at all score thresholds.
identifier: Optional string indicating what this curve is.
"""
precision_recall_auc = metrics.auc(recalls, precisions)
plt.figure()
step_kwargs = ({
'step': 'post'
} if 'step' in fixes.signature(plt.fill_between).parameters else {})
plt.step(recalls, precisions, color='b', alpha=0.2, where='post')
plt.fill_between(recalls, precisions, alpha=0.2, color='b', **step_kwargs)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1])
if identifier:
plt.title('PR curve for %s (AUC = %.2f).' % (
identifier, precision_recall_auc))
else:
plt.title('PR curve (AUC = %.2f).' % precision_recall_auc)
plt.show()
In [25]:
def compute_metrics_from_dir(results_dir, is_test=False):
files = get_list_results_files(results_dir)
for file_path in files:
curr_trial_name = os.path.basename(file_path)
print(curr_trial_name)
labels, model_preds, communities = load_csv_predictions(file_path, is_test)
fpr, tpr, thresholds = metrics.roc_curve(labels, model_preds)
roc_auc = metrics.auc(fpr, tpr)
precisions, recalls, thr = metrics.precision_recall_curve(labels, model_preds)
pr_auc = metrics.auc(recalls, precisions)
model_preds_binary = (model_preds > 0.5).astype(np.int_)
f1 = metrics.f1_score(labels, model_preds_binary)
print('\tROC AUC: {}'.format(roc_auc))
print('\tPR AUC: {}'.format(pr_auc))
print('\tF1: {}'.format(f1))
plot_pr_curve(precisions, recalls, curr_trial_name)
In [16]:
TF_CNN_VALID_RESULTS_DIR = "gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_cnn/validation"
TF_GRU_VALID_RESULTS_DIR = "gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_gru_attention/validation"
TF_HUB_VALID_RESULTS_DIR = "gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_hub_classifier/validation"
In [ ]:
compute_metrics_from_dir(TF_CNN_VALID_RESULTS_DIR)
In [ ]:
compute_metrics_from_dir(TF_GRU_VALID_RESULTS_DIR)
In [ ]:
compute_metrics_from_dir(TF_HUB_VALID_RESULTS_DIR)
In [26]:
TF_CNN_TEST_RESULTS_DIR = "gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_cnn/test"
TF_GRU_TEST_RESULTS_DIR = "gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_gru_attention/test"
TF_HUB_TEST_RESULTS_DIR = "gs://conversationai-models/resources/transfer_learning_data/many_communities_40_per_8_shot/results/tf_hub_classifier/test"
In [ ]:
compute_metrics_from_dir(TF_CNN_TEST_RESULTS_DIR, is_test=True)
In [ ]:
compute_metrics_from_dir(TF_GRU_TEST_RESULTS_DIR, is_test=False)
In [ ]:
compute_metrics_from_dir(TF_HUB_TEST_RESULTS_DIR)