This document contains the three simulations described in the paper.
In [1]:
    
# setup stuff
% load_ext autoreload
% autoreload 2
import numpy as np
from random import *
from dmww_classes import *
from sampling_helper import *
seed(1) # for debugging
    
In [2]:
    
# define inference parameters to be used in all ME simulations
p = Params(n_samps=20,
           n_particles=1,
           alpha_r=.0001,
           alpha_nr=.001,
           empty_intent=.1,
           n_hypermoves=5)
    
Create toy corpus.
In [35]:
    
# build world
w = World(n_words=5, n_objs=5)
w.show()
# build corpus
c = Corpus(world=w, n_sents=20, n_per_sent=1)
c.show()
    
    
Add novel word to the world and then add additional ME observation to corpus.
In [36]:
    
w.update(1, labels = ["fep"])
w.show()
c.sents.append([array([1,5]),array([5])])
c.update(w)
c.show()
    
    
Now, do inference with the ME observation in the corpus.
In [37]:
    
m = Lexicon(c, p,
            verbose=0,
            hyper_inf=True)
m.learn_lex_gibbs(c, p)
m.show()
[m.ref_score]
    
    
    Out[37]:
In [39]:
    
m.plot_lex(w)
    
    
The novel word ("fep") gets mapped to the novel object (fep) in this small corpus.
Set up world and corpus.
In [40]:
    
corpusfile = 'corpora/corpus.csv'
w = World(corpus=corpusfile)
w.show()
c = Corpus(world=w, corpus=corpusfile)
    
    
Add novel word and object to world, then add ME observation.
In [42]:
    
# BIRD = 22
# FEP = 23
# fep = 420
w.update(1, labels = ["fep"])
w.show()
c.sents.append([array([22,23]),array([420])])
c.update(w)
    
    
Now, do inference with ME observation.
In [44]:
    
l = Lexicon(c, p,
            verbose=0,
            hyper_inf=True);
l.learn_lex_gibbs(c, p);
    
    
Plot.
In [57]:
    
l.plot_lex(w, certainwords = 1)
    
    
Luce choice in 2AFC novel-novel task.
In [65]:
    
known_word = l.ref[22, 8] #BIRD, bigbird
correct = l.ref[23, 420] #FEP, fep
incorrect = l.ref[22, 420] #BIRD, fep
lc = divide(correct, correct + incorrect)
print 'Known word, correct object: ' + str(known_word)
print 'Novel word, correct object: ' + str(correct)
print 'Novel word, incorrect object: ' + str(incorrect)
print 'Novel word, Luce Choice: ' + str(lc)
    
    
The novel word gets mapped to the novel object.
Set up world and corpus.
In [45]:
    
corpusfile = 'corpora/corpus.csv'
w = World(corpus=corpusfile)
w.show()
c = Corpus(world=w,corpus=corpusfile)
    
    
Add TWO novel words and novel objects, and add ME observations.
In [46]:
    
# FEP = 23
# fep = 420
# TOMA = 24
# toma = 421
w.update(2, labels = ["fep", "toma"])
w.show()
c.sents.append([array([23]),array([420])]) # novel training
c.sents.append([array([23,24]),array([421])]) # ME test
c.update(w)
    
    
Now, do inference with Gibbs sampler.
In [47]:
    
l = Lexicon(c, p,
            verbose=0,
            hyper_inf=True)
l.learn_lex_gibbs(c, p);
    
    
Plot.
In [48]:
    
l.plot_lex(w, certainwords = 1)
    
    
Luce choice in 2AFC novel-novel task
In [49]:
    
correct = l.ref[24, 421]
incorrect = l.ref[23, 421]
lc = divide(correct, correct + incorrect)
print 'Novel word, correct object: ' + str(correct)
print 'Novel word, incorrect object: ' + str(incorrect)
print 'Novel word, Luce Choice: ' + str(lc)
    
    
At ME test, novel object gets correctly mapped.
Make corpus. The properties of this training corpus are described in the paper.
In [13]:
    
# set parameters 
condition = 3
num_words = 18
num_occurrences = 6
total_words = num_words * num_occurrences
num_trials = total_words / condition
items = np.ones(num_words) * num_occurrences
# make empty corpus
yucorpus =  list()
for i in range(num_trials):
    yucorpus.append([np.zeros(condition, dtype=int8), 
                     np.zeros(condition, dtype=int8)])
# generate actual corpus
try:
    for i in range(num_trials): # for each trial
        for j in range(condition): # for each word/object pairing in the trial
            item_probs = np.divide(items, total_words)
            
            yucorpus[i][0][j] = where(np.random.multinomial(1, item_probs) == 1)[0][0]
            
            # avoid repeats
            c = 1
            while sum(yucorpus[i][0][j] == yucorpus[i][0]) > 1:
              yucorpus[i][0][j] = where(np.random.multinomial(1, item_probs) == 1)[0][0]
              c = c + 1;
              if c > 1000:
                    break 
            
            yucorpus[i][1][j] = yucorpus[i][0][j]
         
            # decrement the item counts
            items[yucorpus[i][0][j]] = items[yucorpus[i][0][j]]  - 1;
            total_words = total_words - 1;
except ValueError:
      print 'failed to generate corpus, run again!'
    
Set up world and corpus.
In [14]:
    
w = World(n_words=18, n_objs=18)
c = Corpus(w,
           n_per_sent=condition,
           n_sents=num_trials)        
c.sents = yucorpus
c.update()
    
Define model parameters.
In [15]:
    
p = Params(n_samps=200,
           n_particles=1,
           alpha_r=.1,
           alpha_nr=1,
           empty_intent=.0001,
           n_hypermoves=5)
    
Do inference with Gibbs sampler.
In [16]:
    
l = Lexicon(c, p,
            verbose=0,
            hyper_inf=True)
l.learn_lex_gibbs(c, p)
    
    
Plot.
In [17]:
    
l.plot_lex(w, certainwords = 0)
    
    
Evaluate posterior using luce choice. At test, learners were given 4AFC task (1 correct referent and 3 foils).
In [18]:
    
epsilon = .00001
l.ref = l.ref + epsilon  
num_foils = 3
lc = zeros(num_words)
correct = zeros(num_words)
incorrect = zeros(num_words)
for i in range(num_words):
    foils = sample(xrange(num_words),num_foils)
    while i in foils:
        foils = sample(xrange(num_words),num_foils)
        
    correct[i] = l.ref[i,i]
    incorrect[i] = l.ref[foils[0],i] + l.ref[foils[1],0] + l.ref[foils[2],0] 
    lc[i] = divide(correct[i], correct[i] + incorrect[i])
    
choice_score = sum(lc)/num_words
choice_score
    
    Out[18]:
Plot for one run of the model
In [ ]:
    
from matplotlib import pyplot as plt
x = arange(3)
y = array([0.734509062,	0.601131376, 0.488957579])
f = pylab.figure()
ax = f.add_axes([0.1, 0.1, .8, .8])
ax.bar(x, y, align='center')
ax.set_xticks(x)
ax.set_xticklabels(['2x2', '3x3', '4x4'])
plt.axhline(.25, color = "black", linestyle='dashed', linewidth=2)
plt.ylim([0,1])
ax.set_title('Gibbs sampler')
f.show()
    
Create toy corpus with arbitrary number of words and objects (n=8).
In [51]:
    
# build world
w = World(n_words=8, n_objs=8)
w.show()
# build corpus
c_base = Corpus(world=w, n_sents=20, n_per_sent=1)
c_base.show()
    
    
Do inference over this corpus.
In [52]:
    
p = Params(n_samps=20,
           n_particles=1,
           alpha_r=.01,
           alpha_nr=1,
           empty_intent=.1,
           n_hypermoves=5)
#  posterior prior to seeing any experimental evidence
l = Lexicon(c_base, p,
            verbose=0,
            hyper_inf=True)
l.learn_lex_gibbs(c_base, p)
[l.sample_scores]
base_scores = l.sample_scores
    
    
Now, give evidence from each of the four experimental conditions and compare each to posterior prior to observing evidence.
(1) TWO words, TWO objects
In [53]:
    
w = World(n_words=8, n_objs=8)
w.show()
w.update(2, labels = ["fep", "toma"])
c_2w2o = copy.deepcopy(c_base)
c_2w2o.sents.append([array([8, 9]),array([8])])
c_2w2o.sents.append([array([8, 9]),array([9])])
c_2w2o.update(w)
c_2w2o.show()
l = Lexicon(c_2w2o, p,
            verbose=0,
            hyper_inf=True)
l.learn_lex_gibbs(c_2w2o, p)
print [l.sample_scores]
scores_2w2o = l.sample_scores
    
    
(2) ONE word, ONE object
In [54]:
    
c_1w1o = copy.deepcopy(c_base)
c_1w1o.sents.append([array([8, 8]),array([8])])
c_1w1o.sents.append([array([8, 8]),array([8])])
c_1w1o.update(w)
l = Lexicon(c_1w1o, p,
            verbose=0,
            hyper_inf=True)
l.learn_lex_gibbs(c_1w1o, p)
scores_1w1o = l.sample_scores
    
    
(3) ONE word, TWO objects
In [55]:
    
c_1w2o = copy.deepcopy(c_base)
c_1w2o.sents.append([array([8, 9]),array([8])])
c_1w2o.sents.append([array([8, 9]),array([8])])
c_1w2o.update(w)
c_1w2o.show()
l = Lexicon(c_1w2o, p,
            verbose=0,
            hyper_inf=True)
l.learn_lex_gibbs(c_1w2o, p)
print [l.sample_scores]
scores_1w2o = l.sample_scores
    
    
(4) TWO words, ONE object
In [57]:
    
c_2w1o = copy.deepcopy(c_base)
c_2w1o.sents.append([array([8, 8]),array([8])])
c_2w1o.sents.append([array([8, 8]),array([9])])
c_2w1o.update(w)
l = Lexicon(c_2w1o, p,
            verbose=0,
            hyper_inf=True)
l.learn_lex_gibbs(c_2w1o, p)
scores_2w1o = l.sample_scores
    
    
Compare posterior scores across conditions.
In [89]:
    
# take maxiumum posterior for each experimental condition
scores = (max(scores_1w1o), 
          max(scores_1w2o), 
          max(scores_2w1o), 
          max(scores_2w2o))
dif_scores = max(base_scores) - scores
conditions = ('1w1o', '1w2o', '2w1o', '2w2o')
dif_scores
conditions
    
    Out[89]:
Make plot of difference scores before and after observing evidence, for each experimental condition.
In [93]:
    
#import matplotlib.pyplot as plt
fig, ax = plt.subplots()
index = np.arange(4)
bar_width = 0.8
rects1 = plt.bar(index + bar_width, dif_scores, bar_width,
                 alpha= .5,
                 color='r')
plt.xlabel('Condition')
plt.ylabel('Posterior difference scores')
plt.title('Posterior difference scores by condition')
plt.xticks(index + bar_width + (bar_width/2), conditions)
plt.tight_layout()