In [966]:
import os
import sys
import pandas
import IPython
smooth = pandas.rolling_mean
sys.path += [os.getenv('HOME') + path for path in 
             ['/Dist/fully-neural-lvsr',
              '/Dist/fully-neural-lvsr/libs/Theano',
              '/Dist/fully-neural-lvsr/libs/blocks']]
from blocks.serialization import load, load_parameters
from matplotlib import pyplot
from pprint import pprint
from lvsr.datasets.mt import H5PyMTDataset
%matplotlib inline

In [967]:


In [790]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))



In [2]:
cd /data/lisatmp4/bahdanau/mt2


/data/lisatmp4/bahdanau/mt2

In [3]:
logs = {}
dfs = {}

In [855]:
def load_model(path):
    log = load(open(path), name='log')
    df = pandas.DataFrame.from_dict(log, orient='index')
    name = path[:-4] if path[-3:] == 'tar' else path
    logs[name] = log
    dfs[name] = df
    if 'best_valid_train_cost' in log.status:
        print 'best_valid_train_cost', log.status['best_valid_train_cost']
    if 'best_valid_mean_total_reward' in log.status:
        print 'best_valid_mean_total_reward', log.status['best_valid_mean_total_reward']
    if 'mean_total_reward' in dfs[name]:
        print 'mean_total_reward:', dfs[name].mean_total_reward[-10:].mean()
    
def compare_log_likelihood(models, s=slice(None)):
    pyplot.figure(figsize=(10, 5))
    legend = []
    for m in models:
        dfs[m].train_cost.astype('float32').dropna().loc[s].plot()
        dfs[m].valid_train_cost.astype('float32').dropna().loc[s].plot(ls='--')
        legend += ['train_' + m]
        legend += ['valid_' + m]
    pyplot.legend(legend)
    
def compare_actor_critic_costs(models, s=slice(None)):
    pyplot.figure(figsize=(10, 5))
    legend = []
    for m in models:
        dfs[m].readout_costs_mean_critic_cost.astype('float32').dropna().loc[s].plot()
        legend += [m]
    pyplot.legend(legend)    
    
def compare_rewards_and_errors(models, s=slice(None)):
    pyplot.figure(figsize=(15, 10))
    legend = []
    for m in models:
        dfs[m].mean_total_reward.astype('float32').dropna().loc[s].plot()
        legend += ['train_' + m]        
        if 'valid_mean_total_reward' in dfs[m]:
            dfs[m].valid_mean_total_reward.astype('float32').dropna().loc[s].plot(ls='--')
            legend += ['valid_' + m]
    pyplot.ylim(ymin=0)
    pyplot.legend(legend, loc='best')
    pyplot.xlabel("Iterations")
    pyplot.ylabel("Reward")
    pyplot.title('Reward')
    pyplot.show()
    
    pyplot.figure(figsize=(15, 5))
    for m in models:
        dfs[m].readout_costs_mean_critic_cost.astype('float32').dropna().loc[s].plot()
    pyplot.legend(models, loc='best')
    pyplot.xlabel("Iterations")
    pyplot.ylabel("Reward")
    pyplot.title('Critic error')
    pyplot.show()
    
    pyplot.figure(figsize=(15, 5))
    for m in models:
        dfs[m].readout_costs_mean_actor_cost.astype('float32').dropna().loc[s].plot()
    pyplot.legend(models, loc='best')
    pyplot.xlabel("Iterations")
    pyplot.ylabel("Reward")
    pyplot.title('Actor error')
    pyplot.show()    
   
def compare_per(models, s=slice(None)):
    pyplot.figure(figsize=(10, 5))
    legend = []
    for m in models:
        dfs[m].valid_per.astype('float32').dropna().loc[s].plot(ls='--')
        legend += [m]
    pyplot.legend(legend)    
    
def compare_gradient_norms(models):
    legend = []
    for m in models:
        dfs[m].total_gradient_norm.astype('float32').dropna().plot(use_index=False)
        legend += [m]
    pyplot.legend(legend)
    pyplot.title('Gradient norm')

def compare_max_adjustments(models):
    legend = []
    for m in models:
        dfs[m].readout_costs_max_adjustment.astype('float32').dropna().plot()
        legend += [m]
    pyplot.legend(legend)
    pyplot.title('Max adjustment')
    
def compare_weight_entropy(models):
    legend = []
    for m in models:
        dfs[m].average_weights_entropy_per_label.astype('float32').dropna().plot()
        legend += [m]
    pyplot.legend(legend)
    pyplot.title('Weight entropy')
    pyplot.show()
    
def compare_entropies(models, s=slice(None)):
    legend = []
    for m in models:
        dfs[m].readout_costs_mean_actor_entropy.astype('float32').dropna().loc[s].plot()
        legend += [m]
    pyplot.legend(legend)
    pyplot.title('Entropy')



Actor pretraining


In [924]:
# load_model('ted1e/main.tar')
# load_model('ted1i/main.tar')
# load_model('ted1x/main.tar')
load_model('ted1y/main.tar')
load_model('ted1y/annealing.tar')


best_valid_train_cost 50.1516418457
best_valid_train_cost 51.035823822

Critic pretraining


In [212]:
# load_model('ted2/critic_pretraining.tar')
load_model('ted3a/critic_pretraining.tar')
load_model('ted3b/critic_pretraining.tar')
load_model('ted3c/critic_pretraining.tar')
# Without using actor states
load_model('ted3e/critic_pretraining.tar')
# With back-propagation through the right-hand side
load_model('ted3f/critic_pretraining.tar')
load_model('ted3k/critic_pretraining.tar')
# load_model('ted4k/critic_pretraining.tar')
# load_model('ted7/critic_pretraining.tar')
# load_model('ted8/critic_pretraining.tar')


mean_total_reward: 4.78993055556
mean_total_reward: 4.620625
mean_total_reward: 4.65677083333
mean_total_reward: 4.80107638889
mean_total_reward: 4.14878472222
mean_total_reward: 6.88083333333

In [116]:
# load_model('ted4/critic_pretraining.tar')
# load_model('ted4a/critic_pretraining.tar')
# load_model('ted4a1/critic_pretraining.tar')
# load_model('ted4b/critic_pretraining.tar')
# load_model('ted4c/critic_pretraining.tar')
# load_model('ted4d/critic_pretraining.tar')
# load_model('ted4e/critic_pretraining.tar')
# load_model('ted4f/critic_pretraining.tar')
# load_model('ted4g/critic_pretraining.tar')
# load_model('ted4h/critic_pretraining.tar')
# load_model('ted4k/critic_pretraining.tar')


mean_total_reward: 1.4319723977
mean_total_reward: 1.75060462952
mean_total_reward: 2.08893013
mean_total_reward: 1.64400524563
mean_total_reward: 1.26427258386
mean_total_reward: 1.60714054108
mean_total_reward: 1.57896031274
mean_total_reward: 1.18813334571
mean_total_reward: 1.56072860294
mean_total_reward: 2.15120315552
mean_total_reward: 4.94235483805

In [216]:
# load_model('ted9/critic_pretraining.tar')
# load_model('ted9k/critic_pretraining.tar')
load_model('ted9c/critic_pretraining.tar')
load_model('ted9l/critic_pretraining.tar')


mean_total_reward: 2.46866268582
mean_total_reward: 5.08094702827

In [134]:
load_model('ted6/critic_pretraining.tar')
load_model('ted6a/critic_pretraining.tar')
load_model('ted6b/critic_pretraining.tar')
load_model('ted6c/critic_pretraining.tar')
load_model('ted6f/critic_pretraining.tar')
load_model('ted6h/critic_pretraining.tar')
load_model('ted6g/critic_pretraining.tar')
load_model('ted6k/critic_pretraining.tar')


mean_total_reward: 5.16104166667
mean_total_reward: 4.93111111111
mean_total_reward: 5.16026785714
mean_total_reward: 4.93
mean_total_reward: 4.2103125
mean_total_reward: 5.33909722222
mean_total_reward: 5.24652777778
mean_total_reward: 7.56045138889

In [699]:
# load_model('ted11/critic_pretraining.tar')
# load_model('ted11a/critic_pretraining.tar')
# load_model('ted11b/critic_pretraining.tar')
# load_model('ted11c/critic_pretraining.tar')
# load_model('ted11d/critic_pretraining.tar')
# load_model('ted11e/critic_pretraining.tar')
# load_model('ted11f/critic_pretraining.tar')
# load_model('ted11g/critic_pretraining.tar')
# load_model('ted11h/critic_pretraining.tar')
load_model('ted11q/critic_pretraining.tar')
load_model('ted11r/critic_pretraining.tar')
load_model('ted11s/critic_pretraining.tar')
load_model('ted11t/critic_pretraining.tar')
load_model('ted11u/critic_pretraining.tar')


mean_total_reward: 3.12834231059
mean_total_reward: 3.12834231059
mean_total_reward: 3.53378486633
mean_total_reward: 3.43945036994
mean_total_reward: 3.12834231059

In [23]:
# The only run without DP
compare_rewards_and_errors(
    ['ted2/critic_pretraining'])



In [214]:
# Preliminary experiments with convolutional encoder in critic 
compare_rewards_and_errors(
    [#'ted3/critic_pretraining',
     'ted3a/critic_pretraining',
     'ted3b/critic_pretraining',
     'ted3e/critic_pretraining',
     #'ted3f/critic_pretraining',
     ])



In [145]:
# Edit distance
compare_rewards_and_errors(
    ['ted6/critic_pretraining',
     'ted6a/critic_pretraining',
     'ted6c/critic_pretraining',
     'ted6d/critic_pretraining',
     'ted6e/critic_pretraining',
     'ted6f/critic_pretraining',
     'ted6g/critic_pretraining',
     'ted6h/critic_pretraining'
    ])



In [72]:
# Annealing
compare_rewards_and_errors(
    [#'ted3c/critic_pretraining',
     'ted6b/critic_pretraining'])



In [61]:
# BLEU
compare_rewards_and_errors(
    ['ted4/critic_pretraining',
     'ted4a/critic_pretraining',
     'ted4b/critic_pretraining',
     'ted4d/critic_pretraining',
     'ted4e/critic_pretraining',
     'ted4f/critic_pretraining',
     'ted4g/critic_pretraining',
     'ted4h/critic_pretraining',
     'ted4a1/critic_pretraining'])


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-61-edf38179a077> in <module>()
      9      'ted4g/critic_pretraining',
     10      'ted4h/critic_pretraining',
---> 11      'ted4a1/critic_pretraining'])

<ipython-input-4-e91bf3fda691> in compare_rewards_and_errors(models, s)
     32     legend = []
     33     for m in models:
---> 34         dfs[m].mean_total_reward.astype('float32').dropna().loc[s].plot()
     35         legend += ['train_' + m]
     36         if 'valid_mean_total_reward' in dfs[m]:

KeyError: 'ted4/critic_pretraining'
<matplotlib.figure.Figure at 0x7fc6ad092e10>

In [217]:
compare_rewards_and_errors(
    ['ted9c/critic_pretraining'])



In [218]:
compare_rewards_and_errors(
    ['ted9l/critic_pretraining'])



In [580]:
# Reward prediction
compare_rewards_and_errors(
    ['ted7/critic_pretraining',
     'ted8/critic_pretraining'])



In [144]:
# Softmax 2, edit distance
compare_rewards_and_errors(
    ['ted3k/critic_pretraining',
     'ted6k/critic_pretraining'])



In [117]:
# Softmax 2, BLEU
compare_rewards_and_errors(
    ['ted4k/critic_pretraining'])



In [700]:
compare_rewards_and_errors(
    [#'ted11/critic_pretraining',
     #'ted11a/critic_pretraining',
     #'ted11b/critic_pretraining',
     'ted11q/critic_pretraining',
     'ted11r/critic_pretraining',
     'ted11s/critic_pretraining',
     'ted11t/critic_pretraining',
     'ted11u/critic_pretraining',
    ],
)



In [183]:
compare_rewards_and_errors([
        'ted11/critic_pretraining',
        'ted11h/critic_pretraining',
    ])



In [185]:
compare_rewards_and_errors([
        'ted11/critic_pretraining',
        'ted11e/critic_pretraining',
    ])



In [882]:

Main


In [942]:
# load_model('ted11u7/main.tar')
# load_model('ted11u8/main.tar')
# load_model('ted14/main.tar')
# load_model('ted14a/main.tar')
load_model('ted15/main.tar')
load_model('ted15b/main.tar')
# load_model('ted15a/main.tar')


best_valid_mean_total_reward 5.12157344818
mean_total_reward: 6.73055267334
best_valid_mean_total_reward 5.06236410141
mean_total_reward: 6.10983445909

In [75]:
compare_rewards_and_errors(
    ['ted2/main', 
     #'ted3/main', 
     'ted3a/main',
     'ted3b/main',
     'ted3e/main',
     'ted6a/main'
    ], s=slice(10, 10000))



In [41]:
compare_per(
    [#'ted2/main', 
     #'ted3/main', 
     #'ted3a/main',
     'ted3b/main',
     'ted3e/main',
     'ted3k/main'
    ])



In [29]:
compare_entropies(
    [
        'ted4h1/main',
        'ted9a/main',
        'ted9a1/main',
        'ted9/main',
        'ted9k/main'
    ])



In [12]:
compare_rewards_and_errors(
    ['ted4/main', 'ted4a/main', 'ted4k1/main', 'ted4h1/main', 'ted4a1/main'])


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-12-43565c6f4d2d> in <module>()
      1 compare_rewards_and_errors(
----> 2     ['ted4/main', 'ted4a/main', 'ted4k1/main', 'ted4h1/main', 'ted4a1/main'])

<ipython-input-4-e91bf3fda691> in compare_rewards_and_errors(models, s)
     32     legend = []
     33     for m in models:
---> 34         dfs[m].mean_total_reward.astype('float32').dropna().loc[s].plot()
     35         legend += ['train_' + m]
     36         if 'valid_mean_total_reward' in dfs[m]:

KeyError: 'ted4/main'
<matplotlib.figure.Figure at 0x7fc714f51090>

In [582]:
compare_rewards_and_errors(
    ['ted7/main', 'ted8/main'])



In [36]:
compare_rewards_and_errors(
    ['ted3k/main'])



In [223]:
compare_rewards_and_errors([
        'ted9c/main',
        'ted9l/main',
    ])



In [160]:
compare_rewards_and_errors([
        'ted11/main',
        'ted111/main',
        'ted11a/main',
        'ted11b/main',
        'ted11d/main',
        'ted11e/main',
        'ted11f/main'])



In [220]:
compare_rewards_and_errors([
        'ted11i/main',
        'ted11j/main',
        'ted11k/main',
        'ted11l/main',
        'ted11n/main',
        'ted11m/main',
    ])



In [929]:
compare_rewards_and_errors([
    'ted11u1/main',
    #'ted11u6/main',
    #'ted11u7/main',
    #'ted11u8/main',
    'ted14/main',
    'ted14a/main'
    ],
    s=slice(100000))



In [878]:
compare_rewards_and_errors([
    'ted11u1/main',
    'ted11u5a/main',
    'ted11u5b/main',
    'ted11u5c/main',
    'ted11u8/main',
    'ted11u7/main',
    ],
    s=slice(50000))



In [939]:
dfs['ted15b/main'].valid_per.dropna()


Out[939]:
0        19.631764
4792     20.312286
9584     20.896858
14376    20.941597
19168    21.393073
23960    21.647881
28752    21.450895
33544    21.663690
38336    21.990562
43128    21.902035
47920    21.912130
52712    22.080272
57504    22.061219
Name: valid_per, dtype: float64

In [943]:
compare_rewards_and_errors([
    'ted15/main',
    'ted15b/main'
    ])



In [944]:
compare_per(
    ['ted15/main', 'ted15b/main'])



In [928]:
compare_entropies([
        'ted11u1/main',
        'ted14/main',
    ])



In [916]:
compare_gradient_norms(
        ['ted14/main',
         'ted14a/main'])


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-916-977ea20da0f2> in <module>()
      1 compare_gradient_norms(
      2         ['ted14/main',
----> 3          'ted14a/main'])

<ipython-input-855-f672338245ac> in compare_gradient_norms(models)
     75     legend = []
     76     for m in models:
---> 77         dfs[m].total_gradient_norm.astype('float32').dropna().plot(use_index=False)
     78         legend += [m]
     79     pyplot.legend(legend)

/u/bahdanau/.local/lib/python2.7/site-packages/pandas/core/generic.pyc in __getattr__(self, name)
   2358                 return self[name]
   2359             raise AttributeError("'%s' object has no attribute '%s'" %
-> 2360                                  (type(self).__name__, name))
   2361 
   2362     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'total_gradient_norm'

Analysis


In [1048]:
import re
from StringIO import StringIO

def tex_escape(text):
    conv = {
        '%': r'\%',
        #'$': r'\$',
        '#': r'\#',
        '_': r'\_',
        '~': r'\textasciitilde{}',
        '&': r'\&',
        '^': r'\^{}',
        '<s>': r' BOS ',
        '</s>': r' EOS ',
    }
    regex = re.compile('|'.join(re.escape(unicode(key)) 
                       for key in sorted(conv.keys(), key=lambda item: - len(item))))
    return regex.sub(lambda match: conv[match.group()], text)

def print_critic_suggestions(it, i, just_from_groundtruth=False, p_threshold=0.0):
    result = StringIO()
    
    prediction_words = predictions[it][i].split()    
    groundtruth_words = groundtruth[it][i].split()
    
    print >>result, r"$\begin{array}{cc}"
    print >>result, r"\mathbf{{Groundtruth}} & \textrm{{ {} }}\\".format(tex_escape(" ".join(groundtruth_words[:groundtruth_words.index('</s>') + 1])))
    print >>result, r"\mathbf{{Prediction}} & \textrm{{ {} }}".format(tex_escape(" ".join(prediction_words[:prediction_words.index('</s>') + 1])))
    print >>result, r"\end{array}$"
    print >>result
    
    groundtruth_nums = set([word2num[word] for word in groundtruth[it][i].split()])
    
    print >>result, "$\\begin{array}{ccccc}"
    print >>result, r"\textrm{Word} & \textrm{Reward} & \textrm{Actor prob}. & \textrm{Q} & \textrm{Best Q} \\"
    for step in range(len(prediction_words)):
        actions = enumerate(values[it][step, i])
        if just_from_groundtruth:
            actions = [(n, q) for n, q in actions if n in groundtruth_nums]
        actions = [(n, q) for (n, q) in actions if probs[it][step, i, n] > p_threshold]
        best = list(sorted(actions, key=lambda (i, o): -o))[:5]
        print >>result, "{} & {:.6f} & {:.6f} & {:.6f} &".format(
            tex_escape(prediction_words[step]),
            rewards[it][step, i],
            probs[it][step, i, word2num[prediction_words[step]]],
            values[it][step, i, word2num[prediction_words[step]]],
        )
        print >>result, "\,".join([
            "{}({:.6f}, {:.6f})".format(tex_escape(num2word[c]), o, probs[it][step, i , c]) 
            for c, o in best]),
        print >>result, '\\\\'
        if prediction_words[step] == '</s>':
            break
    print >>result, "\\end{array}$"
    print >>result, r"\newpage"
    result.seek(0)
    return result.read()

def print_critic_suggestions2(it, i, just_from_groundtruth=False, p_threshold=0.0):
    result = StringIO()
    
    prediction_words = predictions[it][i].split()    
    groundtruth_words = groundtruth[it][i].split()
    
    print >>result, r"$\begin{array}{cc}"
    print >>result, r"\mathbf{{Groundtruth}} & \textrm{{ {} }}\\".format(tex_escape(" ".join(groundtruth_words[:groundtruth_words.index('</s>') + 1])))
    print >>result, r"\mathbf{{Prediction}} & \textrm{{ {} }}".format(tex_escape(" ".join(prediction_words[:prediction_words.index('</s>') + 1])))
    print >>result, r"\end{array}$"
    print >>result
    
    groundtruth_nums = set([word2num[word] for word in groundtruth[it][i].split()])
    
    print >>result, "\\begin{tabular}{cc}"
    # print >>result, r"\textrm{Word} & \textrm{Reward} & \textrm{Actor prob}. & \textrm{Q} & \textrm{Best Q} \\"
    print >>result, r"\textrm{Word} & \textrm{Best Q} \\"
    for step in range(len(prediction_words)):
        actions = enumerate(values[it][step, i])
        if just_from_groundtruth:
            actions = [(n, q) for n, q in actions if n in groundtruth_nums]
        actions = [(n, q) for (n, q) in actions if probs[it][step, i, n] > p_threshold]
        best = list(sorted(actions, key=lambda (i, o): -o))[:3]
        print >>result, "{} &".format(tex_escape(prediction_words[step]))
        print >>result, " ".join([
            "{}({:.3f})".format(tex_escape(num2word[c]), o) 
            for c, o in best]),
        print >>result, '\\\\'
        if prediction_words[step] == '</s>':
            break
    print >>result, "\\end{tabular}"
    result.seek(0)
    return result.read()

In [23]:
ted = H5PyMTDataset('targets', file_or_path='/data/lisatmp4/bahdanau/data/TED/de-en/ted.h5', which_sets=('train',))
num2word = ted.num2word
word2num = ted.word2num

In [980]:
load_model('ted15/main_verbose.tar')


best_valid_mean_total_reward 5.0852856636
mean_total_reward: 6.79454310735

In [982]:
log = logs['ted15/main_verbose']
print log.status['iterations_done']
train_cost = [log[t].get('train_cost') for t in range(0, log.status['iterations_done'] + 1)]
rewards = [log[t].get('readout_costs_rewards') for t in range(0, log.status['iterations_done'] + 1)]
mean_reward = [log[t].get('mean_reward') for t in range(0, log.status['iterations_done'] + 1)]
critic_cost = [log[t].get('readout_costs_mean_critic_cost') for t in range(0, log.status['iterations_done'] + 1)]
actor_cost = [log[t].get('readout_costs_mean_actor_cost') for t in range(0, log.status['iterations_done'] + 1)]

inputs = [log[t].get('average_inputs') for t in range(0, log.status['iterations_done'] + 1)]
predictions = [log[t].get('average_predictions') for t in range(0, log.status['iterations_done'] + 1)]
prediction_masks = [log[t].get('readout_costs_prediction_mask') for t in range(0, log.status['iterations_done'] + 1)]
groundtruth = [log[t].get('average_groundtruth') for t in range(0, log.status['iterations_done'] + 1)]

value_biases = [log[t].get('readout_costs_value_biases') for t in range(0, log.status['iterations_done'] + 1)]
values = [log[t].get('readout_costs_values') for t in range(0, log.status['iterations_done'] + 1)]
probs = [log[t].get('readout_costs_probs') for t in range(0, log.status['iterations_done'] + 1)]
outputs = [log[t].get('readout_costs_outputs') for t in range(0, log.status['iterations_done'] + 1)]

prediction_values = [log[t].get('readout_costs_prediction_values') for t in range(0, log.status['iterations_done'] + 1)]
prediction_outputs = [log[t].get('readout_costs_prediction_outputs') for t in range(0, log.status['iterations_done'] + 1)]

value_targets = [log[t].get('readout_costs_value_targets') for t in range(0, log.status['iterations_done'] + 1)]


3

In [1043]:
from IPython.display import Latex
numpy.set_printoptions(precision=4, suppress=True)
it = 2
i = 9
tex = print_critic_suggestions2(it, i, p_threshold=0.0000)
# print tex
Latex(tex)


Out[1043]:
$\begin{array}{cc} \mathbf{Groundtruth} & \textrm{ BOS and there \'s one that i want to talk about . EOS }\\ \mathbf{Prediction} & \textrm{ BOS one of them i want to tell you about here . EOS } \end{array}$ \begin{tabular}{ccccc} \textrm{Word} & \textrm{Reward} & \textrm{Actor prob}. & \textrm{Q} & \textrm{Best Q} \\ BOS & 0.000 & 1.000 & 3.330 & ,(3.582, 0.000) and(3.529, 0.000) \'m(3.529, 0.000) \\ one & 0.001 & 0.930 & 5.403 & and(6.623, 0.007) there(6.200, 0.000) but(5.967, 0.000) \\ of & 0.109 & 0.999 & 4.796 & that(6.197, 0.000) one(5.668, 0.000) \'s(5.467, 0.000) \\ them & 0.376 & 0.999 & 4.637 & that(5.408, 0.000) one(5.118, 0.000) i(5.002, 0.000) \\ i & 0.413 & 0.845 & 4.629 & that(4.796, 0.000) i(4.629, 0.845) ,(4.139, 0.109) \\ want & 0.613 & 0.999 & 5.008 & want(5.008, 0.999) i(4.160, 0.000) \'t(3.361, 0.000) \\ to & 0.944 & 1.000 & 4.729 & to(4.729, 1.000) want(3.497, 0.000) going(3.396, 0.000) \\ tell & 0.228 & 0.155 & 1.185 & talk(3.717, 0.763) you(2.407, 0.000) to(2.133, 0.000) \\ you & 0.136 & 0.993 & 0.729 & about(1.209, 0.001) that(0.989, 0.000) talk(0.924, 0.000) \\ about & 0.206 & 0.895 & 0.706 & about(0.706, 0.895) .(0.660, 0.025) right(0.653, 0.000) \\ here & 0.028 & 0.728 & 0.149 & .(0.498, 0.252) ?(0.291, 0.000) --(0.285, 0.000) \\ . & 0.116 & 1.000 & 0.195 & .(0.195, 1.000) there(0.175, 0.000) know(0.087, 0.000) \\ EOS & 0.000 & 1.000 & -0.093 & .(0.168, 0.000) EOS (-0.093, 1.000) ?(-0.173, 0.000) \\ \end{tabular}$ \newpage

In [1049]:
print print_critic_suggestions2(it, i, p_threshold=0.0000)


$\begin{array}{cc}
\mathbf{Groundtruth} & \textrm{  BOS  and there \&apos;s one that i want to talk about .  EOS  }\\
\mathbf{Prediction} & \textrm{  BOS  one of them i want to tell you about here .  EOS  }
\end{array}$

\begin{tabular}{cc}
\textrm{Word} & \textrm{Best Q} \\
 BOS  &
,(3.582) and(3.529) \&apos;m(3.529) \\
one &
and(6.623) there(6.200) but(5.967) \\
of &
that(6.197) one(5.668) \&apos;s(5.467) \\
them &
that(5.408) one(5.118) i(5.002) \\
i &
that(4.796) i(4.629) ,(4.139) \\
want &
want(5.008) i(4.160) \&apos;t(3.361) \\
to &
to(4.729) want(3.497) going(3.396) \\
tell &
talk(3.717) you(2.407) to(2.133) \\
you &
about(1.209) that(0.989) talk(0.924) \\
about &
about(0.706) .(0.660) right(0.653) \\
here &
.(0.498) ?(0.291) --(0.285) \\
. &
.(0.195) there(0.175) know(0.087) \\
 EOS  &
.(0.168)  EOS (-0.093) ?(-0.173) \\
\end{tabular}


In [646]:
for i in range(0, 10):
    print print_critic_suggestions(it, i, p_threshold=0.001)


$\begin{array}{cc}
\mathbf{Groundtruth} & \textrm{  BOS  that \&apos;s progress : when we prove things wrong .  EOS  }\\
\mathbf{Prediction} & \textrm{  BOS  isn \&apos;t the progress : if we prove that \&apos;s really wrong .  EOS  }
\end{array}$

$\begin{array}{cccc}
\textrm{Word} & \textrm{Reward} & \textrm{Actor prob}. & \textrm{Critic best Q} \\
 BOS  & 0.000 & 1.000 &
 BOS (1.261, 1.000) \\
isn & 0.001 & 0.001 &
that(3.984, 0.157)\,there(3.163, 0.003)\,of(3.135, 0.002)\,let(3.114, 0.003)\,it(3.103, 0.082) \\
\&apos;t & 0.139 & 0.982 &
that(2.735, 0.006)\,this(2.422, 0.004)\,the(2.402, 0.002)\,\&apos;t(2.356, 0.982)\,about(2.251, 0.003) \\
the & 0.385 & 0.025 &
sharing(2.666, 0.001)\,progress(2.411, 0.108)\,possible(2.231, 0.003)\,when(2.178, 0.001)\,that(2.144, 0.288) \\
progress & 0.436 & 0.552 &
difference(1.713, 0.005)\,concept(1.694, 0.007)\,health(1.673, 0.004)\,program(1.624, 0.002)\,progress(1.607, 0.552) \\
: & 0.624 & 0.843 &
:(1.777, 0.843)\,when(1.440, 0.004)\,if(1.338, 0.004)\,--(1.216, 0.006)\,happens(1.066, 0.002) \\
if & 0.205 & 0.368 &
when(2.476, 0.211)\,whenever(1.253, 0.003)\,where(1.100, 0.002)\,especially(1.059, 0.003)\,if(1.059, 0.368) \\
we & 0.262 & 0.791 &
we(0.700, 0.791)\,prove(0.654, 0.005)\,if(0.587, 0.003)\,that(0.524, 0.010)\,case(0.494, 0.008) \\
prove & 0.427 & 0.657 &
prove(0.529, 0.657)\,realized(0.493, 0.004)\,found(0.482, 0.009)\,learned(0.475, 0.007)\,emphasize(0.346, 0.003) \\
that & 0.145 & 0.482 &
things(1.568, 0.030)\,wrong(0.889, 0.002)\,something(0.426, 0.090)\,evidence(0.333, 0.007)\,that(0.148, 0.482) \\
\&apos;s & 0.301 & 0.103 &
.(0.182, 0.003)\,wrong(0.088, 0.009)\,coke(-0.025, 0.001)\,\&apos;s(-0.079, 0.103)\,things(-0.132, 0.071) \\
really & -0.282 & 0.000 &
wrong(-0.182, 0.363)\,somewhat(-0.276, 0.003)\,things(-0.290, 0.001)\,happening(-0.346, 0.001)\,that(-0.400, 0.004) \\
wrong & -0.150 & 0.789 &
surprising(-0.052, 0.001)\,wrong(-0.374, 0.789)\,something(-0.469, 0.030)\,fake(-0.537, 0.002)\,a(-0.585, 0.008) \\
. & 0.003 & 0.983 &
.(0.004, 0.983)\,?(-0.445, 0.006)\,there(-0.464, 0.001)\,with(-0.484, 0.005) \\
 EOS  & 0.000 & 1.000 &
 EOS (0.003, 1.000) \\
\end{array}$
\newpage

$\begin{array}{cc}
\mathbf{Groundtruth} & \textrm{  BOS  my home would have to be whatever i carried around inside me .  EOS  }\\
\mathbf{Prediction} & \textrm{  BOS  wish , back in my home , and in me , whatever i \&apos;m doing , right ?  EOS  }
\end{array}$

$\begin{array}{cccc}
\textrm{Word} & \textrm{Reward} & \textrm{Actor prob}. & \textrm{Critic best Q} \\
 BOS  & 0.000 & 1.000 &
 BOS (1.261, 1.000) \\
wish & 0.000 & 0.000 &
my(2.759, 0.463)\,of(2.641, 0.001)\,and(2.444, 0.096)\,thankfully(2.424, 0.002)\,oh(2.407, 0.003) \\
, & 0.039 & 0.569 &
home(2.368, 0.012)\,he(2.249, 0.002)\,went(2.213, 0.001)\,all(2.195, 0.003)\,my(2.133, 0.095) \\
back & 0.203 & 0.009 &
he(2.133, 0.001)\,my(2.064, 0.718)\,since(2.036, 0.003)\,home(2.027, 0.013)\,it(1.996, 0.012) \\
in & 0.236 & 0.093 &
it(1.750, 0.001)\,is(1.745, 0.001)\,there(1.738, 0.004)\,with(1.726, 0.004)\,to(1.721, 0.136) \\
my & 0.349 & 0.389 &
whatever(1.514, 0.026)\,india(1.444, 0.001)\,there(1.336, 0.001)\,our(1.329, 0.024)\,this(1.305, 0.004) \\
home & 0.531 & 0.785 &
head(1.179, 0.001)\,home(1.023, 0.785)\,age(0.942, 0.001)\,place(0.937, 0.005)\,office(0.925, 0.001) \\
, & 0.198 & 0.977 &
whatever(0.818, 0.002)\,that(0.721, 0.003)\,is(0.578, 0.001)\,and(0.361, 0.005)\,was(0.309, 0.001) \\
and & 0.138 & 0.054 &
whatever(0.741, 0.459)\,anyway(0.507, 0.005)\,have(0.502, 0.001)\,everything(0.472, 0.001)\,it(0.466, 0.005) \\
in & 0.092 & 0.002 &
whatever(0.324, 0.116)\,it(0.259, 0.011)\,anyway(0.241, 0.004)\,my(0.229, 0.003)\,all(0.210, 0.004) \\
me & 0.195 & 0.206 &
argentina(0.466, 0.001)\,it(0.171, 0.003)\,\&apos;(0.169, 0.003)\,any(0.131, 0.001)\,me(0.089, 0.206) \\
, & 0.034 & 0.677 &
.(0.349, 0.003)\,whatever(0.181, 0.003)\,it(0.033, 0.003)\,there(-0.012, 0.122)\,all(-0.050, 0.002) \\
whatever & 0.130 & 0.304 &
whatever(-0.043, 0.304)\,anyway(-0.187, 0.003)\,everything(-0.245, 0.001)\,it(-0.259, 0.008)\,since(-0.286, 0.004) \\
i & 0.337 & 0.917 &
.(-0.022, 0.005)\,it(-0.137, 0.021)\,else(-0.229, 0.003)\,there(-0.278, 0.003)\,he(-0.328, 0.002) \\
\&apos;m & -0.185 & 0.201 &
gave(-0.253, 0.001)\,went(-0.254, 0.002)\,go(-0.267, 0.002)\,give(-0.271, 0.002)\,want(-0.285, 0.001) \\
doing & -0.159 & 0.041 &
involved(-0.024, 0.001)\,around(-0.084, 0.002)\,wearing(-0.110, 0.018)\,down(-0.187, 0.002)\,walking(-0.210, 0.002) \\
, & -0.139 & 0.015 &
.(-0.078, 0.022)\,?(-0.192, 0.002)\,there(-0.194, 0.007)\,it(-0.200, 0.009)\,them(-0.212, 0.001) \\
right & -0.122 & 0.145 &
anyway(-0.175, 0.016)\,including(-0.191, 0.001)\,ok(-0.210, 0.002)\,it(-0.212, 0.013)\,whatever(-0.270, 0.002) \\
? & -0.108 & 0.563 &
anyway(0.061, 0.001)\,.(0.038, 0.047)\,it(-0.001, 0.002)\,around(-0.043, 0.006)\,there(-0.044, 0.076) \\
 EOS  & 0.000 & 0.997 &
 EOS (0.001, 0.997)\,i(-0.487, 0.001) \\
\end{array}$
\newpage

$\begin{array}{cc}
\mathbf{Groundtruth} & \textrm{  BOS  their relationship is a love story that feeds the earth .  EOS  }\\
\mathbf{Prediction} & \textrm{  BOS  more is a forecast who entered the earth . the camp .  EOS  }
\end{array}$

$\begin{array}{cccc}
\textrm{Word} & \textrm{Reward} & \textrm{Actor prob}. & \textrm{Critic best Q} \\
 BOS  & 0.000 & 1.000 &
 BOS (1.261, 1.000) \\
more & 0.000 & 0.004 &
relationship(3.897, 0.002)\,their(3.874, 0.322)\,of(3.393, 0.001)\,this(3.271, 0.022)\,a(3.259, 0.013) \\
is & 0.109 & 0.388 &
relationship(3.857, 0.160)\,importantly(3.450, 0.117)\,is(3.285, 0.388)\,their(3.120, 0.004)\,openness(3.087, 0.013) \\
a & 0.529 & 0.633 &
a(3.405, 0.633)\,is(3.110, 0.002)\,this(2.700, 0.004)\,with(2.606, 0.001)\,kind(2.604, 0.001) \\
forecast & 0.430 & 0.006 &
love(3.063, 0.004)\,library(2.862, 0.002)\,feminine(2.854, 0.001)\,relationship(2.762, 0.234)\,trillion(2.561, 0.002) \\
who & 0.339 & 0.002 &
is(1.811, 0.003)\,relationship(1.748, 0.003)\,journey(1.709, 0.002)\,survey(1.708, 0.001)\,that(1.686, 0.106) \\
entered & 0.236 & 0.002 &
feeds(1.537, 0.004)\,envision(1.329, 0.001)\,perceives(1.313, 0.009)\,believes(1.275, 0.002)\,founded(1.267, 0.004) \\
the & 0.286 & 0.630 &
the(0.906, 0.630)\,that(0.758, 0.001)\,this(0.689, 0.004)\,earth(0.628, 0.355)\,a(0.624, 0.002) \\
earth & 0.443 & 0.936 &
earth(0.804, 0.936)\,ground(0.372, 0.002)\,world(0.309, 0.036)\,way(0.270, 0.001)\,globe(0.218, 0.001) \\
. & 0.881 & 0.672 &
.(0.553, 0.672)\,that(0.466, 0.040)\,with(0.368, 0.013)\,earth(0.350, 0.001)\,this(0.260, 0.002) \\
the & 0.030 & 0.012 &
\&quot;(0.077, 0.004)\, EOS (-0.003, 0.958)\,the(-0.104, 0.012)\,this(-0.217, 0.004)\,right(-0.218, 0.002) \\
camp & -0.004 & 0.000 &
earth(0.104, 0.798)\,gap(0.030, 0.001)\,u.s.(-0.161, 0.003)\,soil(-0.203, 0.006)\,ground(-0.245, 0.002) \\
. & -0.288 & 0.938 &
 EOS (-0.002, 0.001)\,earth(-0.091, 0.002)\,here(-0.298, 0.002)\,.(-0.325, 0.938)\,is(-0.443, 0.013) \\
 EOS  & 0.000 & 0.999 &
 EOS (-0.001, 0.999) \\
\end{array}$
\newpage

$\begin{array}{cc}
\mathbf{Groundtruth} & \textrm{  BOS  i took this video in cape town last year .  EOS  }\\
\mathbf{Prediction} & \textrm{  BOS  i took the last video last year in l.a. .  EOS  }
\end{array}$

$\begin{array}{cccc}
\textrm{Word} & \textrm{Reward} & \textrm{Actor prob}. & \textrm{Critic best Q} \\
 BOS  & 0.000 & 1.000 &
 BOS (1.261, 1.000) \\
i & 0.001 & 0.616 &
and(5.019, 0.145)\,i(4.886, 0.616)\,but(4.602, 0.006)\,so(4.447, 0.068)\,because(4.374, 0.002) \\
took & 0.218 & 0.880 &
took(6.204, 0.880)\,started(4.730, 0.001)\,saw(4.714, 0.003)\,showed(4.351, 0.007)\,built(4.246, 0.002) \\
the & 0.603 & 0.025 &
this(5.624, 0.912)\,video(3.523, 0.004)\,that(3.468, 0.033)\,it(3.261, 0.002)\,a(3.200, 0.007) \\
last & 0.538 & 0.570 &
video(2.320, 0.367)\,final(2.298, 0.008)\,following(2.283, 0.002)\,last(2.256, 0.570)\,latest(2.170, 0.003) \\
video & 0.442 & 0.654 &
last(1.814, 0.016)\,video(1.683, 0.654)\,year(1.587, 0.219)\,clip(1.540, 0.012)\,shot(1.538, 0.001) \\
last & 0.233 & 0.274 &
in(1.597, 0.178)\,installed(1.346, 0.002)\,shot(1.286, 0.015)\,last(1.114, 0.274)\,laboratory(1.090, 0.001) \\
year & 0.480 & 0.988 &
year(1.145, 0.988)\,video(0.894, 0.001)\,week(0.749, 0.004) \\
in & 0.183 & 0.725 &
.(0.935, 0.035)\,in(0.381, 0.725)\,for(0.375, 0.010)\,we(0.233, 0.002)\,with(0.140, 0.003) \\
l.a. & 0.030 & 0.070 &
town(0.626, 0.018)\,offices(0.452, 0.001)\,downtown(0.448, 0.003)\,afghanistan(0.427, 0.007)\,hospitals(0.409, 0.001) \\
. & 0.087 & 0.197 &
spills(0.164, 0.001)\,haiti(0.094, 0.001)\,.(0.034, 0.197)\, EOS (-0.001, 0.297)\,last(-0.064, 0.005) \\
 EOS  & 0.000 & 0.999 &
 EOS (-0.000, 0.999) \\
\end{array}$
\newpage

$\begin{array}{cc}
\mathbf{Groundtruth} & \textrm{  BOS  this animation from my friends at proton studios shows looking at the big bang from the outside .  EOS  }\\
\mathbf{Prediction} & \textrm{  BOS  we touched this from of friends .  EOS  }
\end{array}$

$\begin{array}{cccc}
\textrm{Word} & \textrm{Reward} & \textrm{Actor prob}. & \textrm{Critic best Q} \\
 BOS  & 0.000 & 1.000 &
 BOS (1.261, 1.000) \\
we & 0.000 & 0.008 &
this(3.002, 0.395)\,of(2.867, 0.002)\,from(2.851, 0.009)\,at(2.835, 0.002)\,and(2.835, 0.090) \\
touched & 0.004 & 0.002 &
reuse(3.052, 0.003)\,pump(2.981, 0.001)\,converted(2.954, 0.002)\,from(2.921, 0.004)\,learn(2.852, 0.005) \\
this & 0.068 & 0.125 &
at(2.795, 0.002)\,from(2.692, 0.387)\,by(2.661, 0.087)\,videos(2.661, 0.004)\,between(2.586, 0.001) \\
from & 0.168 & 0.399 &
series(2.724, 0.021)\,from(2.642, 0.399)\,email(2.635, 0.002)\,by(2.576, 0.337)\,talk(2.552, 0.001) \\
of & 0.209 & 0.008 &
mythology(3.058, 0.002)\,prehistoric(2.971, 0.001)\,tens(2.804, 0.007)\,email(2.535, 0.002)\,perspectives(2.491, 0.001) \\
friends & 0.262 & 0.582 &
meetings(2.437, 0.002)\,thousands(2.275, 0.007)\,from(2.244, 0.004)\,paintings(2.211, 0.005)\,millions(2.166, 0.002) \\
. & 0.259 & 0.035 &
at(2.532, 0.012)\,tracking(2.277, 0.001)\,looking(2.264, 0.006)\,by(2.035, 0.003)\,between(2.020, 0.002) \\
 EOS  & 0.000 & 0.221 &
at(2.011, 0.008)\,slices(1.987, 0.002)\,looking(1.979, 0.001)\,maths(1.922, 0.001)\,by(1.794, 0.003) \\
\end{array}$
\newpage

$\begin{array}{cc}
\mathbf{Groundtruth} & \textrm{  BOS  1900s : maybe one percent .  EOS  }\\
\mathbf{Prediction} & \textrm{  BOS  there may an land that is perhaps one percent .  EOS  }
\end{array}$

$\begin{array}{cccc}
\textrm{Word} & \textrm{Reward} & \textrm{Actor prob}. & \textrm{Critic best Q} \\
 BOS  & 0.000 & 1.000 &
 BOS (1.261, 1.000) \\
there & 0.045 & 0.008 &
or(2.710, 0.002)\,one(2.690, 0.003)\,UNK(2.670, 0.053)\,a(2.651, 0.005)\,\&quot;(2.594, 0.002) \\
may & 0.646 & 0.192 &
maybe(2.689, 0.033)\,UNK(2.468, 0.002)\,was(2.347, 0.120)\,will(2.205, 0.005)\,has(2.201, 0.001) \\
an & 0.638 & 0.004 &
one(1.951, 0.002)\,maybe(1.707, 0.004)\,include(1.589, 0.018)\,add(1.310, 0.001)\,seem(1.305, 0.002) \\
land & 0.136 & 0.001 &
one(1.071, 0.002)\,maybe(0.877, 0.024)\,election(0.736, 0.049)\,47(0.733, 0.001)\,enormous(0.675, 0.003) \\
that & 0.038 & 0.181 &
fluke(0.617, 0.001)\,maybe(0.515, 0.118)\,mining(0.363, 0.003)\,percent(0.305, 0.008)\,breakdown(0.246, 0.001) \\
is & -0.018 & 0.048 &
one(0.867, 0.005)\,maybe(0.527, 0.209)\,requires(0.419, 0.001)\,percent(0.089, 0.004)\,live(0.069, 0.002) \\
perhaps & -0.236 & 0.366 &
maybe(0.004, 0.152)\,one(-0.013, 0.033)\,less(-0.129, 0.001)\,needed(-0.258, 0.003)\,like(-0.412, 0.006) \\
one & 0.033 & 0.349 &
one(0.582, 0.349)\,maybe(0.418, 0.003)\,less(0.165, 0.003)\,percent(0.148, 0.014)\,94(0.116, 0.004) \\
percent & 0.203 & 0.992 &
percent(0.669, 0.992)\,.(-0.075, 0.002) \\
. & 0.391 & 0.919 &
.(0.529, 0.919)\,percent(-0.016, 0.001)\,one(-0.168, 0.022)\,place(-0.200, 0.001)\,for(-0.357, 0.002) \\
 EOS  & 0.000 & 0.999 &
 EOS (0.003, 0.999) \\
\end{array}$
\newpage

$\begin{array}{cc}
\mathbf{Groundtruth} & \textrm{  BOS  you need to know how to tell a story .  EOS  }\\
\mathbf{Prediction} & \textrm{  BOS  well you have to know how to tell you .  EOS  }
\end{array}$

$\begin{array}{cccc}
\textrm{Word} & \textrm{Reward} & \textrm{Actor prob}. & \textrm{Critic best Q} \\
 BOS  & 0.000 & 1.000 &
 BOS (1.261, 1.000) \\
well & 0.001 & 0.002 &
you(5.061, 0.936)\,we(4.645, 0.012)\,and(4.297, 0.010)\,so(4.235, 0.005)\,i(4.197, 0.003) \\
you & 0.166 & 0.601 &
you(4.693, 0.601)\,we(4.353, 0.014)\,needless(4.303, 0.001)\,to(4.178, 0.005)\,they(4.114, 0.002) \\
have & 0.458 & 0.717 &
need(6.184, 0.174)\,really(3.863, 0.002)\,can(3.793, 0.001)\,know(3.757, 0.021)\,have(3.755, 0.717) \\
to & 0.439 & 0.997 &
to(3.424, 0.997) \\
know & 0.640 & 0.785 &
know(3.351, 0.785)\,need(2.562, 0.008)\,tell(2.488, 0.003)\,learn(1.974, 0.006)\,remember(1.897, 0.001) \\
how & 0.974 & 0.908 &
how(3.127, 0.908)\,where(1.608, 0.005)\,what(1.399, 0.020)\,--(1.300, 0.002)\,a(1.104, 0.006) \\
to & 1.551 & 0.843 &
to(3.753, 0.843)\,a(1.119, 0.065)\,we(1.039, 0.001)\,it(1.019, 0.026)\,many(1.015, 0.002) \\
tell & 1.475 & 0.924 &
tell(2.578, 0.924)\,a(1.208, 0.002)\,be(1.051, 0.002)\,give(0.700, 0.002)\,make(0.694, 0.012) \\
you & 0.064 & 0.420 &
a(2.580, 0.235)\,story(0.445, 0.002)\,your(0.435, 0.002)\,.(0.408, 0.129)\,them(0.399, 0.011) \\
. & 0.185 & 0.289 &
a(0.619, 0.646)\,.(0.001, 0.289)\,story(-0.303, 0.006)\,this(-0.638, 0.001)\,the(-0.789, 0.004) \\
 EOS  & 0.000 & 0.992 &
 EOS (-0.002, 0.992)\,story(-1.052, 0.005) \\
\end{array}$
\newpage

$\begin{array}{cc}
\mathbf{Groundtruth} & \textrm{  BOS  but you can go beyond that .  EOS  }\\
\mathbf{Prediction} & \textrm{  BOS  but you know it out beyond that .  EOS  }
\end{array}$

$\begin{array}{cccc}
\textrm{Word} & \textrm{Reward} & \textrm{Actor prob}. & \textrm{Critic best Q} \\
 BOS  & 0.000 & 1.000 &
 BOS (1.261, 1.000) \\
but & 0.022 & 0.885 &
you(2.808, 0.045)\,but(2.730, 0.885)\,and(2.706, 0.027)\,well(2.450, 0.002)\,can(2.303, 0.003) \\
you & 0.716 & 0.629 &
you(3.758, 0.629)\,if(2.673, 0.004)\,as(2.537, 0.001)\,we(2.456, 0.029)\,do(2.443, 0.001) \\
know & 0.939 & 0.002 &
can(3.423, 0.953)\,know(1.854, 0.002)\,\&apos;ll(1.842, 0.002)\,could(1.732, 0.006)\,may(1.711, 0.002) \\
it & 0.333 & 0.020 &
can(0.878, 0.004)\,beyond(0.811, 0.037)\,that(0.802, 0.084)\,nothing(0.750, 0.004)\,something(0.722, 0.004) \\
out & 0.157 & 0.014 &
go(0.584, 0.032)\,outside(0.531, 0.010)\,beyond(0.510, 0.133)\,further(0.380, 0.021)\,happens(0.370, 0.003) \\
beyond & 0.212 & 0.422 &
further(0.232, 0.004)\,.(0.224, 0.051)\,today(0.200, 0.002)\,that(0.172, 0.008)\,outside(0.124, 0.002) \\
that & 0.387 & 0.446 &
that(0.795, 0.446)\,scale(0.069, 0.004)\,.(0.003, 0.178)\,stuff(-0.004, 0.001)\,others(-0.013, 0.001) \\
. & 0.428 & 0.994 &
.(0.294, 0.994)\,?(-0.016, 0.001)\,,(-0.960, 0.002) \\
 EOS  & 0.000 & 1.000 &
 EOS (0.001, 1.000) \\
\end{array}$
\newpage

$\begin{array}{cc}
\mathbf{Groundtruth} & \textrm{  BOS  and like , knock like a woodpecker on my door until i opened it up .  EOS  }\\
\mathbf{Prediction} & \textrm{  BOS  and he UNK like a UNK brought to my closed until i gave it a new way .  EOS  }
\end{array}$

$\begin{array}{cccc}
\textrm{Word} & \textrm{Reward} & \textrm{Actor prob}. & \textrm{Critic best Q} \\
 BOS  & 0.000 & 1.000 &
 BOS (1.261, 1.000) \\
and & 0.000 & 0.948 &
and(2.696, 0.948)\,it(2.406, 0.003)\,he(2.397, 0.037)\,so(2.314, 0.002) \\
he & 0.012 & 0.795 &
like(3.024, 0.001)\,there(2.727, 0.001)\,by(2.589, 0.002)\,on(2.585, 0.001)\,when(2.489, 0.002) \\
UNK & 0.114 & 0.168 &
knocked(3.009, 0.002)\,showed(2.841, 0.002)\,feels(2.836, 0.008)\,felt(2.786, 0.004)\,said(2.782, 0.012) \\
like & 0.229 & 0.393 &
on(2.513, 0.016)\,like(2.326, 0.393)\,by(2.311, 0.001)\,there(2.274, 0.003)\,wire(2.259, 0.002) \\
a & 0.414 & 0.453 &
a(2.044, 0.453)\,swimming(1.965, 0.001)\,\&quot;(1.873, 0.006)\,it(1.864, 0.003)\,leaping(1.839, 0.002) \\
UNK & 0.292 & 0.175 &
slap(2.107, 0.001)\,lawn(2.098, 0.001)\,shop(2.046, 0.010)\,two-wheeler(2.036, 0.001)\,mercedes-benz(1.949, 0.002) \\
brought & 0.245 & 0.000 &
on(1.653, 0.008)\,climb(1.509, 0.001)\,door(1.492, 0.131)\,alley(1.466, 0.002)\,address(1.402, 0.001) \\
to & 0.195 & 0.387 &
on(1.215, 0.009)\,door(1.152, 0.003)\,by(1.079, 0.002)\,up(1.053, 0.064)\,mine(0.983, 0.006) \\
my & 0.244 & 0.972 &
her(0.652, 0.004)\,this(0.625, 0.001)\,my(0.614, 0.972)\,the(0.560, 0.007)\,me(0.487, 0.005) \\
closed & 0.117 & 0.000 &
throat(1.050, 0.001)\,until(0.832, 0.001)\,address(0.802, 0.001)\,door(0.792, 0.673)\,head(0.694, 0.017) \\
until & 0.175 & 0.397 &
until(0.485, 0.397)\,him(0.455, 0.002)\,door(0.448, 0.006)\,up(0.386, 0.003)\,when(0.357, 0.002) \\
i & 0.375 & 0.805 &
i(0.584, 0.805)\,we(0.380, 0.004)\,until(0.323, 0.003)\,his(0.282, 0.003)\,he(0.246, 0.026) \\
gave & 0.045 & 0.013 &
opened(0.857, 0.008)\,wished(0.500, 0.001)\,just(0.479, 0.005)\,meet(0.459, 0.011)\,knocked(0.432, 0.001) \\
it & 0.110 & 0.234 &
up(0.181, 0.024)\,.(0.087, 0.001)\,them(0.038, 0.024)\,him(0.032, 0.632)\,it(0.001, 0.234) \\
a & 0.011 & 0.072 &
up(0.474, 0.052)\,.(0.008, 0.023)\,very(0.003, 0.002)\,it(-0.010, 0.006)\,down(-0.035, 0.004) \\
new & -0.001 & 0.012 &
cd(-0.079, 0.001)\,piece(-0.091, 0.023)\,side(-0.094, 0.001)\,ruler(-0.096, 0.002)\,year(-0.104, 0.002) \\
way & -0.157 & 0.397 &
coat(0.222, 0.005)\,angle(-0.042, 0.002)\,break(-0.046, 0.004)\,.(-0.065, 0.014)\,cover(-0.077, 0.002) \\
. & -0.071 & 0.817 &
.(-0.043, 0.817)\,there(-0.176, 0.003)\,him(-0.220, 0.005)\,of(-0.265, 0.004)\,ago(-0.302, 0.001) \\
 EOS  & 0.000 & 0.997 &
 EOS (0.000, 0.997)\,\&quot;(-0.048, 0.002) \\
\end{array}$
\newpage

$\begin{array}{cc}
\mathbf{Groundtruth} & \textrm{  BOS  you don \&apos;t have to remember that part , but trust me .  EOS  }\\
\mathbf{Prediction} & \textrm{  BOS  that \&apos;s what you have to think , though , but there you can think .  EOS  }
\end{array}$

$\begin{array}{cccc}
\textrm{Word} & \textrm{Reward} & \textrm{Actor prob}. & \textrm{Critic best Q} \\
 BOS  & 0.000 & 1.000 &
 BOS (1.261, 1.000) \\
that & 0.000 & 0.052 &
you(4.612, 0.401)\,don(4.216, 0.010)\,if(3.903, 0.002)\,and(3.741, 0.055)\,we(3.738, 0.008) \\
\&apos;s & 0.046 & 0.558 &
don(4.072, 0.040)\,you(3.894, 0.076)\,they(3.251, 0.021)\,wouldn(3.144, 0.001)\,,(3.099, 0.021) \\
what & 0.241 & 0.102 &
don(3.906, 0.004)\,you(2.864, 0.047)\,they(2.689, 0.001)\,why(2.665, 0.005)\,still(2.568, 0.005) \\
you & 0.341 & 0.624 &
you(2.490, 0.624)\,we(2.346, 0.017)\,they(2.261, 0.256)\,i(2.186, 0.007)\,to(1.969, 0.008) \\
have & 0.355 & 0.130 &
don(3.559, 0.432)\,have(2.001, 0.130)\,didn(1.969, 0.005)\,can(1.922, 0.015)\,want(1.848, 0.006) \\
to & 0.560 & 0.910 &
to(1.820, 0.910)\,doesn(1.392, 0.005)\,you(1.307, 0.001)\,at(1.278, 0.001)\,,(1.223, 0.025) \\
think & 0.225 & 0.025 &
don(1.700, 0.007)\,remember(1.450, 0.708)\,have(1.404, 0.001)\,accept(1.354, 0.002)\,listen(1.342, 0.004) \\
, & 0.247 & 0.496 &
,(1.005, 0.496)\,:(0.942, 0.005)\,you(0.882, 0.043)\,we(0.825, 0.001)\,if(0.801, 0.005) \\
though & 0.109 & 0.129 &
don(0.835, 0.004)\,but(0.785, 0.702)\,\&quot;(0.692, 0.003)\,doesn(0.516, 0.001)\,you(0.504, 0.017) \\
, & 0.068 & 0.911 &
you(0.468, 0.030)\,it(0.270, 0.010)\,,(0.242, 0.911)\,they(0.233, 0.008)\,that(0.153, 0.001) \\
but & 0.372 & 0.179 &
trust(0.792, 0.054)\,but(0.445, 0.179)\,personally(0.384, 0.003)\,let(0.319, 0.012)\,see(0.306, 0.001) \\
there & 0.017 & 0.001 &
trust(1.637, 0.050)\,forgive(0.792, 0.011)\,please(0.629, 0.003)\,me(0.478, 0.002)\,interests(0.405, 0.001) \\
you & -0.002 & 0.476 &
me(0.104, 0.012)\,.(-0.052, 0.058)\,?(-0.082, 0.004)\,you(-0.117, 0.476)\,for(-0.164, 0.001) \\
can & -0.192 & 0.039 &
trust(0.183, 0.020)\,forgive(0.088, 0.005)\,don(0.002, 0.016)\,listen(-0.001, 0.002)\,ask(-0.057, 0.002) \\
think & -0.166 & 0.102 &
me(0.014, 0.068)\,trust(0.004, 0.017)\,join(-0.054, 0.003)\,ask(-0.098, 0.001)\,.(-0.124, 0.197) \\
. & -0.074 & 0.451 &
.(-0.067, 0.451)\,me(-0.089, 0.004)\,?(-0.165, 0.005)\,you(-0.212, 0.004)\,for(-0.229, 0.003) \\
 EOS  & 0.000 & 1.000 &
 EOS (0.002, 1.000) \\
\end{array}$
\newpage


In [367]:
best_mean_reward = numpy.array([
        g.split().index('</s>') - 1 for g in groundtruth[it]
    ]).mean()
print "Best mean reward:", best_mean_reward

length = prediction_masks[it][:, i].sum()
print list( prediction_values[it][:, i][:length] )
print list( value_targets[it][:, i][:length] )
print list( (probs[it][:, i] * values[it][:, i]).sum(axis=-1)[1:length + 1] )


Best mean reward: 13.875
[0.85135376, 1.9749476, 1.5666727, 1.2047035, 0.95621145, 1.3350012, 1.8331257, 1.1212837, 1.0688899, 1.1804167, 0.73301333, 0.8139571, 1.2994342, 2.2184768, 1.5903366, 0.59918356, 0.51356542, 0.52722973, -0.059877075, 0.17668854, -0.00070162042]
[1.9171013, 1.5826744, 1.1915673, 1.0294173, 1.1446487, 1.6988121, 1.2918158, 0.97680783, 0.89685142, 0.81557077, 0.59529978, 0.92671192, 1.6988597, 5.848381, 0.86002684, 0.64811802, 0.66386282, 0.66233844, 0.24607919, 0.11978325, 0.0]
[1.9171017, 1.5826749, 1.1915678, 1.0294174, 1.1446489, 1.6988127, 1.2918155, 0.97680801, 0.89685178, 0.81557095, 0.59529978, 0.9267118, 1.698505, 3.045759, 0.70673901, 0.52282429, 0.48613217, 0.50686586, 0.18102823, -0.00061118929, 0.20152488]
/part/01/Tmp/lisa/os_v5/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:7: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/part/01/Tmp/lisa/os_v5/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:8: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/part/01/Tmp/lisa/os_v5/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:9: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future

In [203]:
costs = ((prediction_values[it] - value_targets[it]) ** 2 * prediction_masks[it])
print train_cost[it], costs[1:].sum(axis=0).mean()
costs_per_step = ((prediction_values[it] - value_targets[it]) ** 2 * prediction_masks[it]).mean(axis=1)
pyplot.plot(costs_per_step[1:])


2.09167051315 2.09167
Out[203]:
[<matplotlib.lines.Line2D at 0x7fc67d6d3950>]

In [455]:
for step in range(5):
    print values[it][step, i][:10]


[  3.93403697e+00   1.50448346e+00   2.96061393e-04   3.70079398e+00
   3.49806237e+00   3.70079398e+00   3.70079398e+00   3.70079398e+00
   3.70079398e+00   3.70079398e+00]
[ 3.09560323  0.73396677  0.00819693  2.519485    2.65885258  2.519485
  2.519485    2.519485    2.519485    2.519485  ]
[  2.59737992e+00   1.59238231e+00  -2.13861931e-03   2.04827094e+00
   2.23579121e+00   2.04827094e+00   2.04827094e+00   2.04827094e+00
   2.04827094e+00   2.04827094e+00]
[  2.18376374e+00   1.42976058e+00  -1.64488156e-04   1.78234935e+00
   2.30555010e+00   1.78234935e+00   1.78234935e+00   1.78234935e+00
   1.78234935e+00   1.78234935e+00]
[ 1.70555747  0.6297816   0.00672351  1.68172026  2.05962801  1.68172026
  1.68172026  1.68172026  1.68172026  1.68172026]

Debug


In [546]:
#load_model('ted3a_debug/critic_pretraining.tar')
# load_model('ted3a_debug2/critic_pretraining.tar')
# load_model('ted3a_debug4/critic_pretraining.tar')
# load_model('ted3a_debug5/critic_pretraining.tar')
# load_model('ted3a_debug6/critic_pretraining.tar')
# load_model('ted3a_debug7/critic_pretraining.tar')
# load_model('ted3a_debug8/critic_pretraining.tar')
# load_model('ted3a_debug9/critic_pretraining.tar')
# load_model('ted3a_debug10/critic_pretraining.tar')
# load_model('ted2_debug/critic_pretraining.tar')
# load_model('ted3_debug11/critic_pretraining.tar')
# load_model('ted3_debug12/critic_pretraining.tar')
# load_model('ted3_debug13/critic_pretraining.tar')
load_model('ted3_debug14/critic_pretraining.tar')


mean_total_reward: 5.00625

In [261]:
compare_rewards_and_errors(
    [#'ted3a_debug/critic_pretraining',
     'ted3a_debug2/critic_pretraining',
     'ted3a_debug4/critic_pretraining',
     'ted3a_debug5/critic_pretraining',
     'ted3a_debug6/critic_pretraining',   
    ]
)



In [492]:
compare_rewards_and_errors(
    ['ted3a_debug7/critic_pretraining',
     'ted3a_debug8/critic_pretraining',
     'ted3a_debug9/critic_pretraining',
    ]
)



In [509]:
compare_rewards_and_errors(
    ['ted2_debug/critic_pretraining'])



In [371]:
compare_rewards_and_errors(
    ['ted3a_debug10/critic_pretraining'])



In [538]:
compare_rewards_and_errors(
    ['ted3_debug11/critic_pretraining',
     'ted3_debug12/critic_pretraining',
     'ted3_debug13/critic_pretraining'])



In [547]:
compare_rewards_and_errors(
    ['ted3_debug14/critic_pretraining'])


Debug main


In [421]:
# load_model('ted3a_debug7/main.tar')
# load_model('ted3a_debug71/main.tar')
# load_model('ted3a_debug72/main.tar')
# load_model('ted3a_debug8/main.tar')
# load_model('ted3a_debug10/main.tar')
load_model('ted2_debug/main.tar')

In [423]:
compare_rewards_and_errors([
    'ted3a_debug7/main',
    'ted3a_debug71/main',
    'ted3a_debug72/main',        
    'ted3a_debug8/main',
    'ted2_debug/main'
    ])



In [286]:
compare_entropies(
    ['ted3a_debug7/main', 
     'ted3a_debug71/main',
     'ted3a_debug72/main'])



In [379]:
compare_rewards_and_errors (
    ['ted3a_debug10/main', ])


BLEU


In [960]:
ll_main_train_bleu = dfs['ted1y/main'].train_per.dropna()
ll_main_valid_bleu = dfs['ted1y/main'].valid_per.dropna()
ll_annealing_train_bleu = dfs['ted1y/annealing'].train_per.dropna()
ll_annealing_valid_bleu = dfs['ted1y/annealing'].valid_per.dropna()
ac_main_train_bleu = dfs['ted15/main'].train_per.dropna()
ac_main_valid_bleu = dfs['ted15/main'].valid_per.dropna()

In [1051]:
IPython.display.set_matplotlib_formats('pdf')

lw = 2
pyplot.figure(figsize=(6, 3))
pyplot.plot(range(0, len(ll_main_valid_bleu)), ll_main_valid_bleu, 'b', linewidth=lw)
pyplot.plot(range(8, len(ll_annealing_valid_bleu) + 8), ll_annealing_valid_bleu, 'g',linewidth=lw)
pyplot.plot(range(5, len(ac_main_valid_bleu) + 5), ac_main_valid_bleu, 'y', linewidth=lw)
pyplot.plot(range(0, len(ll_main_train_bleu)), ll_main_train_bleu, 'b--', linewidth=lw)
pyplot.plot(range(8, len(ll_annealing_valid_bleu) + 8), ll_annealing_train_bleu, 'g--', linewidth=lw) 
pyplot.plot(range(5, len(ac_main_valid_bleu) + 5), ac_main_train_bleu, 'y--', linewidth=lw)            

pyplot.legend(
    ['LL valid, $\\alpha=10^{-3}$',
     'LL valid, $\\alpha=10^{-4}$',
     'AC valid, $\\alpha=10^{-3}$',
     'LL train, $\\alpha=10^{-3}$',
     'LL train, $\\alpha=10^{-4}$',
     'AC train, $\\alpha=10^{-3}$'
     ], loc='best')
pyplot.xlim((0, 50))
pyplot.ylim((10, 40))
pyplot.xlabel("Epochs")
pyplot.ylabel("BLEU")


Out[1051]:
<matplotlib.text.Text at 0x7fc222f99650>
<matplotlib.figure.Figure at 0x7fc22727c510>

In [970]:
IPython.display.set_matplotlib_formats?

In [ ]: