In [ ]:
report_file = 'reports/encdec_200_512_2.json'
log_file = 'logs/encdec_200_512_logs.json'
import json
import matplotlib.pyplot as plt
with open(report_file) as f:
report = json.loads(f.read())
with open(log_file) as f:
logs = json.loads(f.read())
print'Encoder: \n\n', report['architecture']['encoder']
print'Decoder: \n\n', report['architecture']['decoder']
In [ ]:
print('Train Perplexity: ', report['train_perplexity'])
print('Valid Perplexity: ', report['valid_perplexity'])
print('Test Perplexity: ', report['test_perplexity'])
In [ ]:
%matplotlib inline
for k in logs.keys():
plt.plot(logs[k][0], logs[k][1], label=str(k) + ' (train)')
plt.plot(logs[k][0], logs[k][2], label=str(k) + ' (valid)')
plt.title('Loss v. Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
In [ ]:
%matplotlib inline
for k in logs.keys():
plt.plot(logs[k][0], logs[k][3], label=str(k) + ' (train)')
plt.plot(logs[k][0], logs[k][4], label=str(k) + ' (valid)')
plt.title('Perplexity v. Epoch')
plt.xlabel('Epoch')
plt.ylabel('Perplexity')
plt.legend()
plt.show()
In [ ]:
def print_sample(sample, best_bleu=None):
enc_input = ' '.join([w for w in sample['encoder_input'].split(' ') if w != '<pad>'])
gold = ' '.join([w for w in sample['gold'].split(' ') if w != '<mask>'])
print('Input: '+ enc_input + '\n')
print('Gend: ' + sample['generated'] + '\n')
print('True: ' + gold + '\n')
if best_bleu is not None:
cbm = ' '.join([w for w in best_bleu['best_match'].split(' ') if w != '<mask>'])
print('Closest BLEU Match: ' + cbm + '\n')
print('Closest BLEU Score: ' + str(best_bleu['best_score']) + '\n')
print('\n')
In [ ]:
for i, sample in enumerate(report['train_samples']):
print_sample(sample, report['best_bleu_matches_train'][i] if 'best_bleu_matches_train' in report else None)
In [ ]:
for i, sample in enumerate(report['valid_samples']):
print_sample(sample, report['best_bleu_matches_valid'][i] if 'best_bleu_matches_valid' in report else None)
In [ ]:
for i, sample in enumerate(report['test_samples']):
print_sample(sample, report['best_bleu_matches_test'][i] if 'best_bleu_matches_test' in report else None)
In [1]:
def print_bleu(blue_struct):
print 'Overall Score: ', blue_struct['score'], '\n'
print '1-gram Score: ', blue_struct['components']['1']
print '2-gram Score: ', blue_struct['components']['2']
print '3-gram Score: ', blue_struct['components']['3']
print '4-gram Score: ', blue_struct['components']['4']
In [ ]:
# Training Set BLEU Scores
print_bleu(report['train_bleu'])
In [ ]:
# Validation Set BLEU Scores
print_bleu(report['valid_bleu'])
In [ ]:
# Test Set BLEU Scores
print_bleu(report['test_bleu'])
In [ ]:
# All Data BLEU Scores
print_bleu(report['combined_bleu'])
In [ ]:
# Training Set BLEU n-pairs Scores
print_bleu(report['n_pairs_bleu_train'])
In [ ]:
# Validation Set n-pairs BLEU Scores
print_bleu(report['n_pairs_bleu_valid'])
In [ ]:
# Test Set n-pairs BLEU Scores
print_bleu(report['n_pairs_bleu_test'])
In [ ]:
# Combined n-pairs BLEU Scores
print_bleu(report['n_pairs_bleu_all'])
In [ ]:
# Ground Truth n-pairs BLEU Scores
print_bleu(report['n_pairs_bleu_gold'])
In [ ]:
print 'Average (Train) Generated Score: ', report['average_alignment_train']
print 'Average (Valid) Generated Score: ', report['average_alignment_valid']
print 'Average (Test) Generated Score: ', report['average_alignment_test']
print 'Average (All) Generated Score: ', report['average_alignment_all']
print 'Average Gold Score: ', report['average_alignment_gold']