In [ ]:
import os
import sys
import pandas
smooth = pandas.rolling_mean
sys.path += [os.getenv('HOME') + path for path in 
             ['/Dist/fully-neural-lvsr',
              '/Dist/fully-neural-lvsr/libs/Theano',
              '/Dist/fully-neural-lvsr/libs/blocks']]
from blocks.serialization import load, load_parameters
from matplotlib import pyplot
from pprint import pprint
%matplotlib inline

In [ ]:
from lvsr.datasets.text import char2code, code2char

In [ ]:
cd /data/lisatmp4/bahdanau/autoencoder3/

In [ ]:
logs = {}
dfs = {}

In [ ]:
def load_model(path):
    log = load(open(path), name='log')
    df = pandas.DataFrame.from_dict(log, orient='index')
    logs[path[:-4]] = log
    dfs[path[:-4]] = df
    print log.status['best_valid_mean_total_reward'], log.status['best_valid_per']
    
def compare_rewards_and_errors(models, s=slice(None)):
    pyplot.figure(figsize=(15, 10))
    legend = []
    for m in models:
        dfs[m].mean_total_reward.astype('float32').dropna().loc[s].plot()
        legend += ['train_' + m]        
        if 'valid_mean_total_reward' in dfs[m]:
            dfs[m].valid_mean_total_reward.astype('float32').dropna().loc[s].plot(ls='--')
            legend += ['valid_' + m]
    pyplot.ylim(ymin=0)
    pyplot.legend(legend, loc='best')
    pyplot.xlabel("Iterations")
    pyplot.ylabel("Reward")
    pyplot.title('Reward')
    pyplot.show()
    
    pyplot.figure(figsize=(10, 5))
    for m in models:
        dfs[m].readout_costs_mean_critic_cost.astype('float32').dropna().loc[s].plot()
    pyplot.legend(models, loc='best')
    pyplot.xlabel("Iterations")
    pyplot.ylabel("Reward")
    pyplot.title('Critic error')
    pyplot.show()
    
    pyplot.figure(figsize=(10, 5))
    for m in models:
        dfs[m].readout_costs_mean_actor_cost.astype('float32').dropna().loc[s].plot()
    pyplot.legend(models, loc='best')
    pyplot.xlabel("Iterations")
    pyplot.ylabel("Reward")
    pyplot.title('Actor error')
    pyplot.show()    
    
def compare_per(models, s=slice(None)):
    pyplot.figure(figsize=(10, 5))
    legend = []
    for m in models:
        dfs[m].valid_per.astype('float32').dropna().loc[s].plot()
        legend += [m]
    pyplot.legend(legend)      
    
def compare_gradient_norms(models):
    legend = []
    for m in models:
        dfs[m].total_gradient_norm.astype('float32').dropna().plot(use_index=False)
        legend += [m]
    pyplot.legend(legend)
    pyplot.title('Gradient norm')

def compare_max_adjustments(models):
    legend = []
    for m in models:
        dfs[m].readout_costs_max_adjustment.astype('float32').dropna().plot()
        legend += [m]
    pyplot.legend(legend)
    pyplot.title('Max adjustment')
    
def compare_weight_entropy(models):
    legend = []
    for m in models:
        dfs[m].average_weights_entropy_per_label.astype('float32').dropna().plot()
        legend += [m]
    pyplot.legend(legend)
    pyplot.title('Weight entropy')
    pyplot.show()
    
def compare_entropies(models, s=slice(None)):
    legend = []
    for m in models:
        dfs[m].readout_costs_mean_actor_entropy.astype('float32').dropna().loc[s].plot()
        legend += [m]
    pyplot.legend(legend)
    pyplot.title('Max adjustment')

Pretraining


In [ ]:
load_model('actor_critic4/critic_pretraining.tar')
load_model('actor_critic4a/critic_pretraining.tar')
load_model('actor_critic4b/critic_pretraining.tar')
# load_model('actor_critic4c/critic_pretraining.tar')
# load_model('actor_critic4d/critic_pretraining.tar')
load_model('actor_critic4e/critic_pretraining.tar')

In [ ]:
compare_rewards_and_errors(
    ['actor_critic4/critic_pretraining',
     'actor_critic4a/critic_pretraining',
     'actor_critic4b/critic_pretraining',
     'actor_critic4e/critic_pretraining',
    ])

In [ ]:
load_model('actor_critic5/critic_pretraining.tar')
load_model('actor_critic5a/critic_pretraining.tar')
# load_model('actor_critic5b/critic_pretraining.tar')
load_model('actor_critic5c/critic_pretraining.tar')
load_model('actor_critic6/critic_pretraining.tar')

In [ ]:
compare_rewards_and_errors(
    ['actor_critic5/critic_pretraining',
     'actor_critic5a/critic_pretraining',
     'actor_critic5c/critic_pretraining',
     'actor_critic6/critic_pretraining',
    ])

Main


In [ ]:
load_model('actor_critic4/main.tar')
load_model('actor_critic4a/main.tar')
load_model('actor_critic4b/main.tar')
load_model('actor_critic4c/main.tar')
load_model('actor_critic4d/main.tar')
load_model('actor_critic4e/main.tar')

In [ ]:
compare_rewards_and_errors(
     [
        'actor_critic4/main',
        'actor_critic4a/main',
        'actor_critic4b/main',
        'actor_critic4c/main',
        'actor_critic4d/main',
        'actor_critic4e/main',
     ])

In [ ]:
compare_per(
    ['actor_critic4/main',
     'actor_critic4a/main',
     'actor_critic4b/main',
     'actor_critic4c/main',
     'actor_critic4d/main',
     'actor_critic4e/main',])

In [ ]:
load_model('actor_critic5/main.tar')
load_model('actor_critic5a/main.tar')
load_model('actor_critic5b/main.tar')
load_model('actor_critic5c/main.tar')
load_model('actor_critic6/main.tar')

In [ ]:
# load_model('actor_critic7/main.tar')
# load_model('actor_critic7a/main.tar')
load_model('actor_critic7b/main.tar')
load_model('actor_critic7c/main.tar')
load_model('actor_critic7d/main.tar')

In [ ]:
compare_rewards_and_errors(
     [
        'actor_critic5/main',
        'actor_critic5a/main',
        'actor_critic5b/main',
        'actor_critic5c/main',
        'actor_critic6/main',
     ])

In [ ]:
compare_rewards_and_errors(
    ['actor_critic7/main', 
     'actor_critic7a/main', 
     'actor_critic7b/main',
     'actor_critic7c/main',
     'actor_critic7d/main'])

In [ ]:
compare_per(
     [
        #'actor_critic5/main',
        #'actor_critic5a/main',
        #'actor_critic5c/main',
        'actor_critic7/main',
        'actor_critic7a/main',
        'actor_critic7b/main',
        'actor_critic7c/main',
        'actor_critic7d/main',
     ])

In [ ]:
compare_entropies(
    [
        'actor_critic4/main',
        'actor_critic4e/main',
        'actor_critic5/main',
        'actor_critic5a/main',
        'actor_critic7/main',
        'actor_critic7a/main',
        'actor_critic7b/main',
        'actor_critic7c/main',        
    ],
)

Analysis


In [ ]:
def print_critic_suggestions(it, i, just_from_groundtruth=False, n_best=5):
    print "Groundtruth: ", groundtruth[it][i]
    print "Prediction: ", predictions[it][i]
    print "Rewards: ", rewards[it][:, i]
    groundtruth_nums = set([char2code[word] for word in groundtruth[it][i]])
    predictions_words = predictions[it][i]
    for step in range(len(predictions_words)):
        actions = enumerate(values[it][step, i])
        if just_from_groundtruth:
            actions = [(n, q) for n, q in actions if n in groundtruth_nums]
        best = list(sorted(actions, key=lambda (i, o): -o))
        print [(code2char[c], o, probs[it][step, i , c]) for c, o in best[:n_best]]
        if predictions_words[step] == '$':
            break
            
def show_probs(it, i):
    pyplot.matshow(probs[it][:, i])
    pyplot.colorbar()
    pyplot.show()            
            
def show_values(it, i):
    pyplot.matshow(values[it][:, i])
    pyplot.colorbar()
    pyplot.show()

In [ ]:
load_model('actor_critic7/critic_pretraining.tar')

In [ ]:
log = logs['actor_critic7/critic_pretraining']

In [ ]:
print log.status['iterations_done']
print log.status['_config']
train_cost = [log[t].get('train_cost') for t in range(0, log.status['iterations_done'])]
rewards = [log[t].get('readout_costs_rewards') for t in range(0, log.status['iterations_done'])]
mean_reward = [log[t].get('mean_total_reward') for t in range(0, log.status['iterations_done'])]
critic_cost = [log[t].get('readout_costs_mean_critic_cost') for t in range(0, log.status['iterations_done'])]
actor_cost = [log[t].get('readout_costs_mean_actor_cost') for t in range(0, log.status['iterations_done'])]

inputs = [log[t].get('average_inputs') for t in range(0, log.status['iterations_done'])]
predictions = [log[t].get('average_predictions') for t in range(0, log.status['iterations_done'])]
prediction_masks = [log[t].get('readout_costs_prediction_mask') for t in range(0, log.status['iterations_done'])]
groundtruth = [log[t].get('average_groundtruth') for t in range(0, log.status['iterations_done'])]

value_biases = [log[t].get('readout_costs_value_biases') for t in range(0, log.status['iterations_done'])]
values = [log[t].get('readout_costs_values') for t in range(0, log.status['iterations_done'])]
probs = [log[t].get('readout_costs_probs') for t in range(0, log.status['iterations_done'])]
outputs = [log[t].get('readout_costs_outputs') for t in range(0, log.status['iterations_done'])]

prediction_values = [log[t].get('readout_costs_prediction_values') for t in range(0, log.status['iterations_done'])]
prediction_outputs = [log[t].get('readout_costs_prediction_outputs') for t in range(0, log.status['iterations_done'])]

value_targets = [log[t].get('readout_costs_value_targets') for t in range(0, log.status['iterations_done'])]

In [ ]:
it = 29800 # training iteration
print mean_reward[it]
#print rewards[t].sum(axis=0)
print train_cost[it], critic_cost[it], actor_cost[it]
print "Groundtruth"
pprint(groundtruth[it])
print "Predictions"
pprint(predictions[it])

In [ ]:
i = 3
print_critic_suggestions(it, i)
show_probs(it, i)
show_values(it, i)

In [ ]:
print prediction_values[it][:, i][:-3]
print [probs[it][t, i, char2code[c]] for t, c in enumerate(predictions[it][i])][:-3]
print value_targets[it][:, i][:-3]
print ((prediction_values[it][:, i] - value_targets[it][:, i]) ** 2)[:-3].sum()
print (probs[it][:, i] * values[it][:, i]).sum(axis=-1)[1:]

In [ ]: