In [39]:
report_files = ["/Users/bking/IdeaProjects/LanguageModelRNN/experiment_results/encdec_noing23_200_512_04drb/encdec_noing23_200_512_04drb.json", "/Users/bking/IdeaProjects/LanguageModelRNN/experiment_results/encdec_noing23_bow_200_512_04drb/encdec_noing23_bow_200_512_04drb.json"]
log_files = ["/Users/bking/IdeaProjects/LanguageModelRNN/experiment_results/encdec_noing23_200_512_04drb/encdec_noing23_200_512_04drb_logs.json", "/Users/bking/IdeaProjects/LanguageModelRNN/experiment_results/encdec_noing23_bow_200_512_04drb/encdec_noing23_bow_200_512_04drb_logs.json"]
reports = []
logs = []
import json
import matplotlib.pyplot as plt
import numpy as np
for report_file in report_files:
with open(report_file) as f:
reports.append((report_file.split('/')[-1].split('.json')[0], json.loads(f.read())))
for log_file in log_files:
with open(log_file) as f:
logs.append((log_file.split('/')[-1].split('.json')[0], json.loads(f.read())))
for report_name, report in reports:
print '\n', report_name, '\n'
print 'Encoder: \n', report['architecture']['encoder']
print 'Decoder: \n', report['architecture']['decoder']
In [40]:
%matplotlib inline
from IPython.display import HTML, display
def display_table(data):
display(HTML(
u'<table><tr>{}</tr></table>'.format(
u'</tr><tr>'.join(
u'<td>{}</td>'.format('</td><td>'.join(unicode(_) for _ in row)) for row in data)
)
))
def bar_chart(data):
n_groups = len(data)
train_perps = [d[1] for d in data]
valid_perps = [d[2] for d in data]
test_perps = [d[3] for d in data]
fig, ax = plt.subplots(figsize=(10,8))
index = np.arange(n_groups)
bar_width = 0.3
opacity = 0.4
error_config = {'ecolor': '0.3'}
train_bars = plt.bar(index, train_perps, bar_width,
alpha=opacity,
color='b',
error_kw=error_config,
label='Training Perplexity')
valid_bars = plt.bar(index + bar_width, valid_perps, bar_width,
alpha=opacity,
color='r',
error_kw=error_config,
label='Valid Perplexity')
test_bars = plt.bar(index + 2*bar_width, test_perps, bar_width,
alpha=opacity,
color='g',
error_kw=error_config,
label='Test Perplexity')
plt.xlabel('Model')
plt.ylabel('Scores')
plt.title('Perplexity by Model and Dataset')
plt.xticks(index + bar_width / 3, [d[0] for d in data])
plt.legend()
plt.tight_layout()
plt.show()
data = [['<b>Model</b>', '<b>Train Perplexity</b>', '<b>Valid Perplexity</b>', '<b>Test Perplexity</b>']]
for rname, report in reports:
data.append([rname, report['train_perplexity'], report['valid_perplexity'], report['test_perplexity']])
display_table(data)
bar_chart(data[1:])
In [41]:
%matplotlib inline
plt.figure(figsize=(10, 8))
for rname, l in logs:
for k in l.keys():
plt.plot(l[k][0], l[k][1], label=str(k) + ' ' + rname + ' (train)')
plt.plot(l[k][0], l[k][2], label=str(k) + ' ' + rname + ' (valid)')
plt.title('Loss v. Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
In [42]:
%matplotlib inline
plt.figure(figsize=(10, 8))
for rname, l in logs:
for k in l.keys():
plt.plot(l[k][0], l[k][3], label=str(k) + ' ' + rname + ' (train)')
plt.plot(l[k][0], l[k][4], label=str(k) + ' ' + rname + ' (valid)')
plt.title('Perplexity v. Epoch')
plt.xlabel('Epoch')
plt.ylabel('Perplexity')
plt.legend()
plt.show()
In [43]:
def print_sample(sample, best_bleu=None):
enc_input = ' '.join([w for w in sample['encoder_input'].split(' ') if w != '<pad>'])
gold = ' '.join([w for w in sample['gold'].split(' ') if w != '<mask>'])
print('Input: '+ enc_input + '\n')
print('Gend: ' + sample['generated'] + '\n')
print('True: ' + gold + '\n')
if best_bleu is not None:
cbm = ' '.join([w for w in best_bleu['best_match'].split(' ') if w != '<mask>'])
print('Closest BLEU Match: ' + cbm + '\n')
print('Closest BLEU Score: ' + str(best_bleu['best_score']) + '\n')
print('\n')
def display_sample(samples, best_bleu=False):
for enc_input in samples:
data = []
for rname, sample in samples[enc_input]:
gold = ' '.join([w for w in sample['gold'].split(' ') if w != '<mask>'])
data.append([rname, '<b>Generated: </b>' + sample['generated']])
if best_bleu:
cbm = ' '.join([w for w in sample['best_match'].split(' ') if w != '<mask>'])
data.append([rname, '<b>Closest BLEU Match: </b>' + cbm + ' (Score: ' + str(sample['best_score']) + ')'])
data.insert(0, ['<u><b>' + enc_input + '</b></u>', '<b>True: ' + gold+ '</b>'])
display_table(data)
def process_samples(samples):
# consolidate samples with identical inputs
result = {}
for rname, t_samples, t_cbms in samples:
for i, sample in enumerate(t_samples):
enc_input = ' '.join([w for w in sample['encoder_input'].split(' ') if w != '<pad>'])
if t_cbms is not None:
sample.update(t_cbms[i])
if enc_input in result:
result[enc_input].append((rname, sample))
else:
result[enc_input] = [(rname, sample)]
return result
In [44]:
samples = process_samples([(rname, r['train_samples'], r['best_bleu_matches_train'] if 'best_bleu_matches_train' in r else None) for (rname, r) in reports])
display_sample(samples, best_bleu='best_bleu_matches_train' in reports[1][1])
In [45]:
samples = process_samples([(rname, r['valid_samples'], r['best_bleu_matches_valid'] if 'best_bleu_matches_valid' in r else None) for (rname, r) in reports])
display_sample(samples, best_bleu='best_bleu_matches_valid' in reports[1][1])
In [46]:
samples = process_samples([(rname, r['test_samples'], r['best_bleu_matches_test'] if 'best_bleu_matches_test' in r else None) for (rname, r) in reports])
display_sample(samples, best_bleu='best_bleu_matches_test' in reports[1][1])
In [47]:
def print_bleu(blue_structs):
data= [['<b>Model</b>', '<b>Overall Score</b>','<b>1-gram Score</b>','<b>2-gram Score</b>','<b>3-gram Score</b>','<b>4-gram Score</b>']]
for rname, blue_struct in blue_structs:
data.append([rname, blue_struct['score'], blue_struct['components']['1'], blue_struct['components']['2'], blue_struct['components']['3'], blue_struct['components']['4']])
display_table(data)
In [48]:
# Training Set BLEU Scores
print_bleu([(rname, report['train_bleu']) for (rname, report) in reports])
In [49]:
# Validation Set BLEU Scores
print_bleu([(rname, report['valid_bleu']) for (rname, report) in reports])
In [50]:
# Test Set BLEU Scores
print_bleu([(rname, report['test_bleu']) for (rname, report) in reports])
In [51]:
# All Data BLEU Scores
print_bleu([(rname, report['combined_bleu']) for (rname, report) in reports])
In [52]:
# Training Set BLEU n-pairs Scores
print_bleu([(rname, report['n_pairs_bleu_train']) for (rname, report) in reports])
In [53]:
# Validation Set n-pairs BLEU Scores
print_bleu([(rname, report['n_pairs_bleu_valid']) for (rname, report) in reports])
In [54]:
# Test Set n-pairs BLEU Scores
print_bleu([(rname, report['n_pairs_bleu_test']) for (rname, report) in reports])
In [55]:
# Combined n-pairs BLEU Scores
print_bleu([(rname, report['n_pairs_bleu_all']) for (rname, report) in reports])
In [56]:
# Ground Truth n-pairs BLEU Scores
print_bleu([(rname, report['n_pairs_bleu_gold']) for (rname, report) in reports])
In [57]:
def print_align(reports):
data= [['<b>Model</b>', '<b>Average (Train) Generated Score</b>','<b>Average (Valid) Generated Score</b>','<b>Average (Test) Generated Score</b>','<b>Average (All) Generated Score</b>', '<b>Average (Gold) Score</b>']]
for rname, report in reports:
data.append([rname, report['average_alignment_train'], report['average_alignment_valid'], report['average_alignment_test'], report['average_alignment_all'], report['average_alignment_gold']])
display_table(data)
print_align(reports)