In principle, this should work on a computer without a GPU. It will help if you have a lot of RAM.
Download http://grzegorz.chrupala.me/data/coco.zip and unzip it in the reimaginet/data/coco directory.
You should have the following files:
dataset.json - MSCOCO sentencesvgg_feats.mat - MSCOCO image vectorsdataset.ipa.jsonl.gz  - IPA transcriptions of MSCOCO sentencesDownload http://grzegorz.chrupala.me/data/model-ipa.zip and put it in the examples directory (same as this notebook)
In [1]:
    
import imaginet.task
    
    
    
In [2]:
    
model = imaginet.task.load(path="model-ipa.zip")
    
    
In [3]:
    
emb = imaginet.task.embeddings(model)
print(emb.shape)
    
    
The table of IPA symbols corresponding to the 49 dimensions
In [5]:
    
symb = imaginet.task.symbols(model)
print " ".join(symb.values())
    
    
Let's display the embeddings projected to 2D via PCA
In [7]:
    
%pylab inline
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
xy = pca.fit_transform(emb)
    
    
In [8]:
    
pylab.rc('font', family='DejaVu Sans')
pylab.figure(figsize=(8,8))
pylab.scatter(xy[:,0], xy[:,1], alpha=0.1)
for j,symb_j in symb.items():
    pylab.text(xy[j,0], xy[j,1], symb_j)
    
    
    
Seems mostly random...
In [9]:
    
from imaginet.data_provider import getDataProvider
# Adjust the root to point to the directory above data
prov = getDataProvider('coco', root="..")
    
In [10]:
    
sents = list(prov.iterSentences(split='val'))
    
In [11]:
    
from imaginet.simple_data import phonemes
sents_ipa = [ phonemes(sent) for sent in sents ]
    
In [12]:
    
reps = imaginet.task.representation(model, sents_ipa)
    
In [13]:
    
from scipy.spatial.distance import cdist
distance = cdist(reps, reps, metric='cosine')
    
In [14]:
    
import numpy
def neighbors(k, distance=distance):
    nn =  numpy.argsort(distance[k,:])[1:5]
    print sents[k]['raw'], ''.join(sents_ipa[k])
    for n in nn:
        print u"✔" if sents[n]['imgid']==sents[k]['imgid'] else u"✘", \
        sents[n]['raw'], ''.join(sents_ipa[n])
    
In [15]:
    
import random
    
In [16]:
    
for _ in range(10):
    neighbors(random.randint(0, len(sents)))
    print
    
    
In [17]:
    
import imaginet.tracer
reload(imaginet.tracer)
    
    Out[17]:
In [18]:
    
tr = imaginet.tracer.Tracer()
    
In [19]:
    
tr.fit(reps)
    
    
In [20]:
    
tr.proj.explained_variance_
    
    Out[20]:
In [21]:
    
from subprocess import check_output
def espeak(words):
    return phon(check_output(["espeak", "-q", "--ipa",
                        '-v', 'en-us',
                        words]).decode('utf-8'))
def phon(inp):
    return list(''.join(inp.split()))
    
In [22]:
    
def trace(orths, tracer=tr, model=model, eos=True, size=(6,6)):
    ipas = [ espeak(orth) for orth in orths ]
    states = [ imaginet.task.states(model, ipa) for ipa in ipas ]
    pylab.figure(figsize=size)
    tracer.traces(ipas, states, eos=eos)
    
In [23]:
    
trace(["A bowl of salad","A plate of pizza","A brown dog", "A black cat"])
    
    
In [24]:
    
trace(["a girl skiing", "a girl wind surfing", "a girl water skiing",])
    
    
In [25]:
    
trace(["a cow", "a baby cow","a tiny baby cow"])
    
    
In [26]:
    
trace(["some food on a table","a computer on a table","a table with food"])
pylab.axis('off')
    
    Out[26]:
    
In [42]:
    
trace(["a bear in a cage", "a brown bear in the zoo","a teddy bear on a chair"])
    
    
In [ ]: