In principle, this should work on a computer without a GPU. It will help if you have a lot of RAM.
Download http://grzegorz.chrupala.me/data/coco.zip and unzip it in the reimaginet/data/coco directory.
You should have the following files:
dataset.json - MSCOCO sentencesvgg_feats.mat - MSCOCO image vectorsdataset.ipa.jsonl.gz  - IPA transcriptions of MSCOCO sentences(Copy pretrained models from yellow.uvt.nl:/home/gchrupala/repos/reimaginet/examples/vis and yellow.uvt.nl:/home/gchrupala/repos/reimaginet/examples/vis/lm)
In [1]:
    
import imaginet.defn.visual as visual
import imaginet.task
    
    
    
In [2]:
    
model = imaginet.task.load(path="vis/model.10.zip")
    
    
In [8]:
    
reload(visual)
emb = visual.embeddings(model)
print(emb.shape)
    
    
The table of IPA symbols corresponding to the 49 dimensions
In [9]:
    
symb = visual.symbols(model)
print " ".join(symb.values())
    
    
Let's display the embeddings projected to 2D via PCA
In [22]:
    
%pylab inline
    
    
In [23]:
    
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
xy = pca.fit_transform(emb)
pylab.rc('font', family='DejaVu Sans')
pylab.figure(figsize=(8,8))
pylab.scatter(xy[:,0], xy[:,1], alpha=0.1)
for j,symb_j in symb.items():
    if symb_j not in ["<BEG>","<END>", "<UNK>"]:
        pylab.text(xy[j,0], xy[j,1], symb_j)
    
    
In [24]:
    
import imaginet.defn.lm
reload(imaginet.defn.lm)
model2 = imaginet.task.load("lm/model.10.zip")
emb2 = imaginet.defn.lm.embeddings(model2)
    
In [25]:
    
pca2 = PCA(n_components=2)
xy = pca.fit_transform(emb2)
pylab.rc('font', family='DejaVu Sans')
pylab.figure(figsize=(8,8))
pylab.scatter(xy[:,0], xy[:,1], alpha=0.1)
for j,symb_j in symb.items(): 
    if symb_j not in ["<BEG>","<END>", "<UNK>"]:
        pylab.text(xy[j,0], xy[j,1], symb_j)
    
    
There is clear structure in the embeddings for the LM model.
In [7]:
    
from imaginet.data_provider import getDataProvider
# Adjust the root to point to the directory above data
prov = getDataProvider('coco', root="..")
    
In [8]:
    
sents = list(prov.iterSentences(split='val'))
    
In [9]:
    
from imaginet.simple_data import phonemes
sents_ipa = [ phonemes(sent) for sent in sents ]
    
In [10]:
    
reps = imaginet.task.representation(model, sents_ipa)
    
In [30]:
    
from scipy.spatial.distance import cdist
distance = cdist(reps, reps, metric='cosine')
    
In [31]:
    
import numpy
def neighbors(k, distance=distance, size=5):
    nn =  numpy.argsort(distance[k,:])[1:size]
    print sents[k]['raw'], ''.join(sents_ipa[k])
    for n in nn:
        print u"✔" if sents[n]['imgid']==sents[k]['imgid'] else u"✘", \
        sents[n]['raw'], ''.join(sents_ipa[n])
    
In [32]:
    
import random
    
In [33]:
    
random.seed(41)
for _ in range(10):
    neighbors(random.randint(0, len(sents)), distance=distance)
    print
    
    
In [36]:
    
reload(visual)
reps2 = []
for i in range(0,len(sents_ipa),512):
    r = [ ri[-1] for ri in imaginet.task.pile(model, sents_ipa[i:i+512], batch_size=256) ]
    reps2.extend(r)
    
In [37]:
    
reps2 = numpy.array(reps2)
distance1 = cdist(reps2[:,1,:], reps2[:,1,:], metric='cosine')
distance0 = cdist(reps2[:,0,:], reps2[:,0,:], metric='cosine')
    
In [38]:
    
%reset_selective reps2 # Free memory
    
    
In [39]:
    
random.seed(41)
for _ in range(10):
    i = random.randint(0, len(sents))
    print "Layer 1"
    neighbors(i, distance=distance0, size=2)
    print "Layer 2"
    neighbors(i, distance=distance1, size=2)
    print "Layer 3"
    neighbors(i, distance=distance, size=2)
    print
    
    
In [11]:
    
import imaginet.tracer
    
In [12]:
    
tr = imaginet.tracer.Tracer()
    
In [13]:
    
tr.fit(reps)
    
    
In [14]:
    
tr.proj.explained_variance_
    
    Out[14]:
In [15]:
    
from subprocess import check_output
def espeak(words):
    return phon(check_output(["espeak", "-q", "--ipa=3",
                        '-v', 'en',
                        words]).decode('utf-8'))
def phon(inp):
    return [ ph.replace(u"ˈ","") for word in inp.split() for ph in word.split("_") ]
    
In [19]:
    
%pylab inline --no-import-all
def trace(orths, tracer=tr, model=model, eos=True, size=(6,6)):
    ipas = [ espeak(orth) for orth in orths ]
    states = imaginet.task.states(model, ipas)
    pylab.figure(figsize=size)
    tracer.traces(ipas, states, eos=eos)
    
    
In [20]:
    
trace(["A bowl of salad","A plate of pizza","A brown dog", "A black cat"])
    
    
In [21]:
    
trace(["a cow", "a baby cow","a tiny baby cow"])
    
    
In [25]:
    
orths = ["A cow","A baby","A tiny"]
ipas = [ espeak(orth) for orth in orths ]
states = imaginet.task.states(model, ipas)
states[0][0] - states[1][0]
    
    Out[25]:
In [23]:
    
trace(["some food on a table","a computer on a table","a table with food"])
pylab.axis('off')
    
    Out[23]:
    
In [24]:
    
trace(["a bear in a cage", "a brown bear in the zoo","a teddy bear on a chair"])
    
    
In [ ]: